src/codeconv.c

   1 /*
   2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
   3  * Copyright (C) 1999-2004 Hiroyuki Yamamoto
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 #  include "config.h"
  22 #endif
  23
  24 #include <glib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include <stdlib.h>
  28 #include <errno.h>
  29
  30 #if HAVE_LOCALE_H
  31 #  include <locale.h>
  32 #endif
  33
  34 #if HAVE_ICONV
  35 #  include <iconv.h>
  36 #endif
  37
  38 #include "intl.h"
  39 #include "codeconv.h"
  40 #include "unmime.h"
  41 #include "base64.h"
  42 #include "quoted-printable.h"
  43 #include "utils.h"
  44 #include "prefs_common.h"
  45
  46 typedef enum
  47 {
  48         JIS_ASCII,
  49         JIS_KANJI,
  50         JIS_HWKANA,
  51         JIS_AUXKANJI
  52 } JISState;
  53
  54 #define SUBST_CHAR      '_'
  55 #define ESC             '\033'
  56
  57 #define iseuckanji(c) \
  58         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
  59 #define iseuchwkana1(c) \
  60         (((c) & 0xff) == 0x8e)
  61 #define iseuchwkana2(c) \
  62         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  63 #define iseucaux(c) \
  64         (((c) & 0xff) == 0x8f)
  65 #define issjiskanji1(c) \
  66         ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
  67          (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
  68 #define issjiskanji2(c) \
  69         ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
  70          (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
  71 #define issjishwkana(c) \
  72         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  73
  74 #define K_IN()                          \
  75         if (state != JIS_KANJI) {       \
  76                 *out++ = ESC;           \
  77                 *out++ = '$';           \
  78                 *out++ = 'B';           \
  79                 state = JIS_KANJI;      \
  80         }
  81
  82 #define K_OUT()                         \
  83         if (state != JIS_ASCII) {       \
  84                 *out++ = ESC;           \
  85                 *out++ = '(';           \
  86                 *out++ = 'B';           \
  87                 state = JIS_ASCII;      \
  88         }
  89
  90 #define HW_IN()                         \
  91         if (state != JIS_HWKANA) {      \
  92                 *out++ = ESC;           \
  93                 *out++ = '(';           \
  94                 *out++ = 'I';           \
  95                 state = JIS_HWKANA;     \
  96         }
  97
  98 #define AUX_IN()                        \
  99         if (state != JIS_AUXKANJI) {    \
 100                 *out++ = ESC;           \
 101                 *out++ = '$';           \
 102                 *out++ = '(';           \
 103                 *out++ = 'D';           \
 104                 state = JIS_AUXKANJI;   \
 105         }
 106
 107 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 108 {
 109         const guchar *in = inbuf;
 110         guchar *out = outbuf;
 111         JISState state = JIS_ASCII;
 112
 113         while (*in != '\0') {
 114                 if (*in == ESC) {
 115                         in++;
 116                         if (*in == '$') {
 117                                 if (*(in + 1) == '@' || *(in + 1) == 'B') {
 118                                         state = JIS_KANJI;
 119                                         in += 2;
 120                                 } else if (*(in + 1) == '(' &&
 121                                            *(in + 2) == 'D') {
 122                                         state = JIS_AUXKANJI;
 123                                         in += 3;
 124                                 } else {
 125                                         /* unknown escape sequence */
 126                                         state = JIS_ASCII;
 127                                 }
 128                         } else if (*in == '(') {
 129                                 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
 130                                         state = JIS_ASCII;
 131                                         in += 2;
 132                                 } else if (*(in + 1) == 'I') {
 133                                         state = JIS_HWKANA;
 134                                         in += 2;
 135                                 } else {
 136                                         /* unknown escape sequence */
 137                                         state = JIS_ASCII;
 138                                 }
 139                         } else {
 140                                 /* unknown escape sequence */
 141                                 state = JIS_ASCII;
 142                         }
 143                 } else if (*in == 0x0e) {
 144                         state = JIS_HWKANA;
 145                         in++;
 146                 } else if (*in == 0x0f) {
 147                         state = JIS_ASCII;
 148                         in++;
 149                 } else {
 150                         switch (state) {
 151                         case JIS_ASCII:
 152                                 *out++ = *in++;
 153                                 break;
 154                         case JIS_KANJI:
 155                                 *out++ = *in++ | 0x80;
 156                                 if (*in == '\0') break;
 157                                 *out++ = *in++ | 0x80;
 158                                 break;
 159                         case JIS_HWKANA:
 160                                 *out++ = 0x8e;
 161                                 *out++ = *in++ | 0x80;
 162                                 break;
 163                         case JIS_AUXKANJI:
 164                                 *out++ = 0x8f;
 165                                 *out++ = *in++ | 0x80;
 166                                 if (*in == '\0') break;
 167                                 *out++ = *in++ | 0x80;
 168                                 break;
 169                         }
 170                 }
 171         }
 172
 173         *out = '\0';
 174 }
 175
 176 #define JIS_HWDAKUTEN           0x5e
 177 #define JIS_HWHANDAKUTEN        0x5f
 178
 179 static gint conv_jis_hantozen(guchar *outbuf, guchar jis_code, guchar sound_sym)
 180 {
 181         static guint16 h2z_tbl[] = {
 182                 /* 0x20 - 0x2f */
 183                 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
 184                 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
 185                 /* 0x30 - 0x3f */
 186                 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
 187                 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
 188                 /* 0x40 - 0x4f */
 189                 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
 190                 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
 191                 /* 0x50 - 0x5f */
 192                 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
 193                 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
 194         };
 195
 196         static guint16 dakuten_tbl[] = {
 197                 /* 0x30 - 0x3f */
 198                 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
 199                 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
 200                 /* 0x40 - 0x4f */
 201                 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
 202                 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
 203         };
 204
 205         static guint16 handakuten_tbl[] = {
 206                 /* 0x4a - 0x4e */
 207                 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
 208         };
 209
 210         guint16 out_code;
 211
 212         jis_code &= 0x7f;
 213         sound_sym &= 0x7f;
 214
 215         if (jis_code < 0x21 || jis_code > 0x5f)
 216                 return 0;
 217
 218         if (sound_sym == JIS_HWDAKUTEN &&
 219             jis_code >= 0x36 && jis_code <= 0x4e) {
 220                 out_code = dakuten_tbl[jis_code - 0x30];
 221                 if (out_code != 0) {
 222                         *outbuf = out_code >> 8;
 223                         *(outbuf + 1) = out_code & 0xff;
 224                         return 2;
 225                 }
 226         }
 227
 228         if (sound_sym == JIS_HWHANDAKUTEN &&
 229             jis_code >= 0x4a && jis_code <= 0x4e) {
 230                 out_code = handakuten_tbl[jis_code - 0x4a];
 231                 *outbuf = out_code >> 8;
 232                 *(outbuf + 1) = out_code & 0xff;
 233                 return 2;
 234         }
 235
 236         out_code = h2z_tbl[jis_code - 0x20];
 237         *outbuf = out_code >> 8;
 238         *(outbuf + 1) = out_code & 0xff;
 239         return 1;
 240 }
 241
 242 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 243 {
 244         const guchar *in = inbuf;
 245         guchar *out = outbuf;
 246         JISState state = JIS_ASCII;
 247
 248         while (*in != '\0') {
 249                 if (isascii(*in)) {
 250                         K_OUT();
 251                         *out++ = *in++;
 252                 } else if (iseuckanji(*in)) {
 253                         if (iseuckanji(*(in + 1))) {
 254                                 K_IN();
 255                                 *out++ = *in++ & 0x7f;
 256                                 *out++ = *in++ & 0x7f;
 257                         } else {
 258                                 K_OUT();
 259                                 *out++ = SUBST_CHAR;
 260                                 in++;
 261                                 if (*in != '\0' && !isascii(*in)) {
 262                                         *out++ = SUBST_CHAR;
 263                                         in++;
 264                                 }
 265                         }
 266                 } else if (iseuchwkana1(*in)) {
 267                         if (iseuchwkana2(*(in + 1))) {
 268                                 if (prefs_common.allow_jisx0201_kana) {
 269                                         HW_IN();
 270                                         in++;
 271                                         *out++ = *in++ & 0x7f;
 272                                 } else {
 273                                         guchar jis_ch[2];
 274                                         gint len;
 275
 276                                         if (iseuchwkana1(*(in + 2)) &&
 277                                             iseuchwkana2(*(in + 3)))
 278                                                 len = conv_jis_hantozen
 279                                                         (jis_ch,
 280                                                          *(in + 1), *(in + 3));
 281                                         else
 282                                                 len = conv_jis_hantozen
 283                                                         (jis_ch,
 284                                                          *(in + 1), '\0');
 285                                         if (len == 0)
 286                                                 in += 2;
 287                                         else {
 288                                                 K_IN();
 289                                                 in += len * 2;
 290                                                 *out++ = jis_ch[0];
 291                                                 *out++ = jis_ch[1];
 292                                         }
 293                                 }
 294                         } else {
 295                                 K_OUT();
 296                                 in++;
 297                                 if (*in != '\0' && !isascii(*in)) {
 298                                         *out++ = SUBST_CHAR;
 299                                         in++;
 300                                 }
 301                         }
 302                 } else if (iseucaux(*in)) {
 303                         in++;
 304                         if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
 305                                 AUX_IN();
 306                                 *out++ = *in++ & 0x7f;
 307                                 *out++ = *in++ & 0x7f;
 308                         } else {
 309                                 K_OUT();
 310                                 if (*in != '\0' && !isascii(*in)) {
 311                                         *out++ = SUBST_CHAR;
 312                                         in++;
 313                                         if (*in != '\0' && !isascii(*in)) {
 314                                                 *out++ = SUBST_CHAR;
 315                                                 in++;
 316                                         }
 317                                 }
 318                         }
 319                 } else {
 320                         K_OUT();
 321                         *out++ = SUBST_CHAR;
 322                         in++;
 323                 }
 324         }
 325
 326         K_OUT();
 327         *out = '\0';
 328 }
 329
 330 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 331 {
 332         const guchar *in = inbuf;
 333         guchar *out = outbuf;
 334
 335         while (*in != '\0') {
 336                 if (isascii(*in)) {
 337                         *out++ = *in++;
 338                 } else if (issjiskanji1(*in)) {
 339                         if (issjiskanji2(*(in + 1))) {
 340                                 guchar out1 = *in;
 341                                 guchar out2 = *(in + 1);
 342                                 guchar row;
 343
 344                                 row = out1 < 0xa0 ? 0x70 : 0xb0;
 345                                 if (out2 < 0x9f) {
 346                                         out1 = (out1 - row) * 2 - 1;
 347                                         out2 -= out2 > 0x7f ? 0x20 : 0x1f;
 348                                 } else {
 349                                         out1 = (out1 - row) * 2;
 350                                         out2 -= 0x7e;
 351                                 }
 352
 353                                 *out++ = out1 | 0x80;
 354                                 *out++ = out2 | 0x80;
 355                                 in += 2;
 356                         } else {
 357                                 *out++ = SUBST_CHAR;
 358                                 in++;
 359                                 if (*in != '\0' && !isascii(*in)) {
 360                                         *out++ = SUBST_CHAR;
 361                                         in++;
 362                                 }
 363                         }
 364                 } else if (issjishwkana(*in)) {
 365                         *out++ = 0x8e;
 366                         *out++ = *in++;
 367                 } else {
 368                         *out++ = SUBST_CHAR;
 369                         in++;
 370                 }
 371         }
 372
 373         *out = '\0';
 374 }
 375
 376 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 377 {
 378         switch (conv_guess_ja_encoding(inbuf)) {
 379         case C_ISO_2022_JP:
 380                 conv_jistoeuc(outbuf, outlen, inbuf);
 381                 break;
 382         case C_SHIFT_JIS:
 383                 conv_sjistoeuc(outbuf, outlen, inbuf);
 384                 break;
 385         default:
 386                 strncpy2(outbuf, inbuf, outlen);
 387                 break;
 388         }
 389 }
 390
 391 void conv_anytoutf8(gchar *outbuf, gint outlen, const gchar *inbuf)
 392 {
 393         gchar *tmpstr = NULL;
 394
 395         switch (conv_guess_ja_encoding(inbuf)) {
 396         case C_ISO_2022_JP:
 397                 tmpstr = conv_codeset_strdup(inbuf, CS_ISO_2022_JP, CS_UTF_8);
 398                 strncpy2(outbuf, tmpstr, outlen);
 399                 g_free(tmpstr);
 400                 break;
 401         case C_SHIFT_JIS:
 402                 tmpstr = conv_codeset_strdup(inbuf, CS_SHIFT_JIS, CS_UTF_8);
 403                 strncpy2(outbuf, tmpstr, outlen);
 404                 g_free(tmpstr);
 405                 break;
 406         case C_EUC_JP:
 407                 tmpstr = conv_codeset_strdup(inbuf, CS_EUC_JP, CS_UTF_8);
 408                 strncpy2(outbuf, tmpstr, outlen);
 409                 g_free(tmpstr);
 410                 break;
 411         default:
 412                 strncpy2(outbuf, inbuf, outlen);
 413                 break;
 414         }
 415 }
 416
 417 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 418 {
 419         switch (conv_guess_ja_encoding(inbuf)) {
 420         case C_EUC_JP:
 421                 conv_euctojis(outbuf, outlen, inbuf);
 422                 break;
 423         default:
 424                 strncpy2(outbuf, inbuf, outlen);
 425                 break;
 426         }
 427 }
 428
 429 static gchar valid_eucjp_tbl[][96] = {
 430         /* 0xa2a0 - 0xa2ff */
 431         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
 432           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 433           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 434           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
 435           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 436           0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
 437
 438         /* 0xa3a0 - 0xa3ff */
 439         { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 440           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
 441           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 442           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 443           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 444           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
 445
 446         /* 0xa4a0 - 0xa4ff */
 447         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 448           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 449           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 450           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 451           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 452           1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 453
 454         /* 0xa5a0 - 0xa5ff */
 455         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 456           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 457           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 458           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 459           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 460           1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 461
 462         /* 0xa6a0 - 0xa6ff */
 463         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 464           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 465           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 466           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 467           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 468           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 469
 470         /* 0xa7a0 - 0xa7ff */
 471         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 472           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 473           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 474           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 475           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 476           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 477
 478         /* 0xa8a0 - 0xa8ff */
 479         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 480           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 481           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 482           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 483           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 484           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
 485 };
 486
 487 static gboolean isprintableeuckanji(guchar c1, guchar c2)
 488 {
 489         if (c1 <= 0xa0 || c1 >= 0xf5)
 490                 return FALSE;
 491         if (c2 <= 0xa0 || c2 == 0xff)
 492                 return FALSE;
 493
 494         if (c1 >= 0xa9 && c1 <= 0xaf)
 495                 return FALSE;
 496
 497         if (c1 >= 0xa2 && c1 <= 0xa8)
 498                 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
 499
 500         if (c1 == 0xcf) {
 501                 if (c2 >= 0xd4 && c2 <= 0xfe)
 502                         return FALSE;
 503         } else if (c1 == 0xf4) {
 504                 if (c2 >= 0xa7 && c2 <= 0xfe)
 505                         return FALSE;
 506         }
 507
 508         return TRUE;
 509 }
 510
 511 void conv_unreadable_eucjp(gchar *str)
 512 {
 513         register guchar *p = str;
 514
 515         while (*p != '\0') {
 516                 if (isascii(*p)) {
 517                         /* convert CR+LF -> LF */
 518                         if (*p == '\r' && *(p + 1) == '\n')
 519                                 memmove(p, p + 1, strlen(p));
 520                         /* printable 7 bit code */
 521                         p++;
 522                 } else if (iseuckanji(*p)) {
 523                         if (isprintableeuckanji(*p, *(p + 1))) {
 524                                 /* printable euc-jp code */
 525                                 p += 2;
 526                         } else {
 527                                 /* substitute unprintable code */
 528                                 *p++ = SUBST_CHAR;
 529                                 if (*p != '\0') {
 530                                         if (isascii(*p))
 531                                                 p++;
 532                                         else
 533                                                 *p++ = SUBST_CHAR;
 534                                 }
 535                         }
 536                 } else if (iseuchwkana1(*p)) {
 537                         if (iseuchwkana2(*(p + 1)))
 538                                 /* euc-jp hankaku kana */
 539                                 p += 2;
 540                         else
 541                                 *p++ = SUBST_CHAR;
 542                 } else if (iseucaux(*p)) {
 543                         if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
 544                                 /* auxiliary kanji */
 545                                 p += 3;
 546                         } else
 547                                 *p++ = SUBST_CHAR;
 548                 } else
 549                         /* substitute unprintable 1 byte code */
 550                         *p++ = SUBST_CHAR;
 551         }
 552 }
 553
 554 void conv_unreadable_8bit(gchar *str)
 555 {
 556         register guchar *p = str;
 557
 558         while (*p != '\0') {
 559                 /* convert CR+LF -> LF */
 560                 if (*p == '\r' && *(p + 1) == '\n')
 561                         memmove(p, p + 1, strlen(p));
 562                 else if (!isascii(*p)) *p = SUBST_CHAR;
 563                 p++;
 564         }
 565 }
 566
 567 void conv_unreadable_latin(gchar *str)
 568 {
 569         register guchar *p = str;
 570
 571         while (*p != '\0') {
 572                 /* convert CR+LF -> LF */
 573                 if (*p == '\r' && *(p + 1) == '\n')
 574                         memmove(p, p + 1, strlen(p));
 575                 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
 576                         *p = SUBST_CHAR;
 577                 p++;
 578         }
 579 }
 580
 581 void conv_unreadable_locale(gchar *str)
 582 {
 583         switch (conv_get_current_charset()) {
 584         case C_US_ASCII:
 585         case C_ISO_8859_1:
 586         case C_ISO_8859_2:
 587         case C_ISO_8859_3:
 588         case C_ISO_8859_4:
 589         case C_ISO_8859_5:
 590         case C_ISO_8859_6:
 591         case C_ISO_8859_7:
 592         case C_ISO_8859_8:
 593         case C_ISO_8859_9:
 594         case C_ISO_8859_10:
 595         case C_ISO_8859_11:
 596         case C_ISO_8859_13:
 597         case C_ISO_8859_14:
 598         case C_ISO_8859_15:
 599                 conv_unreadable_latin(str);
 600                 break;
 601         case C_EUC_JP:
 602                 conv_unreadable_eucjp(str);
 603                 break;
 604         default:
 605                 break;
 606         }
 607 }
 608
 609 #define NCV     '\0'
 610
 611 void conv_mb_alnum(gchar *str)
 612 {
 613         static guchar char_tbl[] = {
 614                 /* 0xa0 - 0xaf */
 615                 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
 616                 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
 617                 /* 0xb0 - 0xbf */
 618                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 619                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 620                 /* 0xc0 - 0xcf */
 621                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 622                 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
 623                 /* 0xd0 - 0xdf */
 624                 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
 625                 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
 626                 /* 0xe0 - 0xef */
 627                 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
 628                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
 629         };
 630
 631         register guchar *p = str;
 632         register gint len;
 633
 634         len = strlen(str);
 635
 636         while (len > 1) {
 637                 if (*p == 0xa3) {
 638                         register guchar ch = *(p + 1);
 639
 640                         if (ch >= 0xb0 && ch <= 0xfa) {
 641                                 /* [a-zA-Z] */
 642                                 *p = ch & 0x7f;
 643                                 p++;
 644                                 len--;
 645                                 memmove(p, p + 1, len);
 646                                 len--;
 647                         } else  {
 648                                 p += 2;
 649                                 len -= 2;
 650                         }
 651                 } else if (*p == 0xa1) {
 652                         register guchar ch = *(p + 1);
 653
 654                         if (ch >= 0xa0 && ch <= 0xef &&
 655                             NCV != char_tbl[ch - 0xa0]) {
 656                                 *p = char_tbl[ch - 0xa0];
 657                                 p++;
 658                                 len--;
 659                                 memmove(p, p + 1, len);
 660                                 len--;
 661                         } else {
 662                                 p += 2;
 663                                 len -= 2;
 664                         }
 665                 } else if (iseuckanji(*p)) {
 666                         p += 2;
 667                         len -= 2;
 668                 } else {
 669                         p++;
 670                         len--;
 671                 }
 672         }
 673 }
 674
 675 CharSet conv_guess_ja_encoding(const gchar *str)
 676 {
 677         const guchar *p = str;
 678         CharSet guessed = C_US_ASCII;
 679
 680         while (*p != '\0') {
 681                 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
 682                         if (guessed == C_US_ASCII)
 683                                 return C_ISO_2022_JP;
 684                         p += 2;
 685                 } else if (isascii(*p)) {
 686                         p++;
 687                 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
 688                         if (*p >= 0xfd && *p <= 0xfe)
 689                                 return C_EUC_JP;
 690                         else if (guessed == C_SHIFT_JIS) {
 691                                 if ((issjiskanji1(*p) &&
 692                                      issjiskanji2(*(p + 1))) ||
 693                                     issjishwkana(*p))
 694                                         guessed = C_SHIFT_JIS;
 695                                 else
 696                                         guessed = C_EUC_JP;
 697                         } else
 698                                 guessed = C_EUC_JP;
 699                         p += 2;
 700                 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
 701                         if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
 702                                 guessed = C_SHIFT_JIS;
 703                         else
 704                                 return C_SHIFT_JIS;
 705                         p += 2;
 706                 } else if (issjishwkana(*p)) {
 707                         guessed = C_SHIFT_JIS;
 708                         p++;
 709                 } else {
 710                         p++;
 711                 }
 712         }
 713
 714         return guessed;
 715 }
 716
 717 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 718 {
 719         conv_jistoeuc(outbuf, outlen, inbuf);
 720         conv_unreadable_eucjp(outbuf);
 721 }
 722
 723 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 724 {
 725         conv_sjistoeuc(outbuf, outlen, inbuf);
 726         conv_unreadable_eucjp(outbuf);
 727 }
 728
 729 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 730 {
 731         strncpy2(outbuf, inbuf, outlen);
 732         conv_unreadable_eucjp(outbuf);
 733 }
 734
 735 #warning FIXME_GTK2
 736 #if 0
 737 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 738 {
 739         conv_anytoeuc(outbuf, outlen, inbuf);
 740         conv_unreadable_eucjp(outbuf);
 741 }
 742 #else
 743 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 744 {
 745         conv_anytoutf8(outbuf, outlen, inbuf);
 746 }
 747 #endif
 748
 749 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 750 {
 751         strncpy2(outbuf, inbuf, outlen);
 752         conv_unreadable_8bit(outbuf);
 753 }
 754
 755 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 756 {
 757         strncpy2(outbuf, inbuf, outlen);
 758         conv_unreadable_latin(outbuf);
 759 }
 760
 761 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 762 {
 763         strncpy2(outbuf, inbuf, outlen);
 764         conv_unreadable_locale(outbuf);
 765 }
 766
 767 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
 768 {
 769         strncpy2(outbuf, inbuf, outlen);
 770 }
 771
 772 CodeConverter *conv_code_converter_new(const gchar *charset)
 773 {
 774         CodeConverter *conv;
 775
 776         conv = g_new0(CodeConverter, 1);
 777 #warning FIXME_GTK2
 778         conv->code_conv_func = conv_get_code_conv_func(charset, CS_UTF_8);
 779         conv->charset_str = g_strdup(charset);
 780         conv->charset = conv_get_charset_from_str(charset);
 781
 782         return conv;
 783 }
 784
 785 void conv_code_converter_destroy(CodeConverter *conv)
 786 {
 787         g_free(conv->charset_str);
 788         g_free(conv);
 789 }
 790
 791 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
 792                   const gchar *inbuf)
 793 {
 794 #if HAVE_ICONV
 795         if (conv->code_conv_func != conv_noconv)
 796                 conv->code_conv_func(outbuf, outlen, inbuf);
 797         else {
 798                 gchar *str;
 799
 800 #warning FIXME_GTK2
 801                 str = conv_iconv_strdup(inbuf, conv->charset_str, CS_UTF_8);
 802                 if (!str)
 803                         return -1;
 804                 else {
 805                         strncpy2(outbuf, str, outlen);
 806                         g_free(str);
 807                 }
 808         }
 809 #else /* !HAVE_ICONV */
 810         conv->code_conv_func(outbuf, outlen, inbuf);
 811 #endif
 812
 813         return 0;
 814 }
 815
 816 gchar *conv_codeset_strdup(const gchar *inbuf,
 817                            const gchar *src_code, const gchar *dest_code)
 818 {
 819         gchar *buf;
 820         size_t len;
 821         CodeConvFunc conv_func;
 822
 823         conv_func = conv_get_code_conv_func(src_code, dest_code);
 824         if (conv_func != conv_noconv) {
 825                 len = (strlen(inbuf) + 1) * 3;
 826                 buf = g_malloc(len);
 827                 if (!buf) return NULL;
 828
 829                 conv_func(buf, len, inbuf);
 830                 return g_realloc(buf, strlen(buf) + 1);
 831         }
 832
 833 #if HAVE_ICONV
 834         return conv_iconv_strdup(inbuf, src_code, dest_code);
 835 #else
 836         return g_strdup(inbuf);
 837 #endif /* HAVE_ICONV */
 838 }
 839
 840 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
 841                                      const gchar *dest_charset_str)
 842 {
 843         CodeConvFunc code_conv = conv_noconv;
 844         CharSet src_charset;
 845         CharSet dest_charset;
 846
 847         if (!src_charset_str)
 848                 src_charset = conv_get_current_charset();
 849         else
 850                 src_charset = conv_get_charset_from_str(src_charset_str);
 851
 852         /* auto detection mode */
 853         if (!src_charset_str && !dest_charset_str) {
 854                 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
 855                         return conv_anytodisp;
 856                 else
 857                         return conv_noconv;
 858         }
 859
 860         dest_charset = conv_get_charset_from_str(dest_charset_str);
 861
 862         if (dest_charset == C_US_ASCII)
 863                 return conv_ustodisp;
 864         else if (dest_charset == C_UTF_8 ||
 865                  (dest_charset == C_AUTO &&
 866                   conv_get_current_charset() == C_UTF_8))
 867                 return conv_noconv;
 868
 869         switch (src_charset) {
 870         case C_ISO_2022_JP:
 871         case C_ISO_2022_JP_2:
 872         case C_ISO_2022_JP_3:
 873                 if (dest_charset == C_AUTO &&
 874                     conv_get_current_charset() == C_EUC_JP)
 875                         code_conv = conv_jistodisp;
 876                 else if (dest_charset == C_EUC_JP)
 877                         code_conv = conv_jistoeuc;
 878                 break;
 879         case C_US_ASCII:
 880                 if (dest_charset == C_AUTO)
 881                         code_conv = conv_ustodisp;
 882                 break;
 883         case C_ISO_8859_1:
 884         case C_ISO_8859_2:
 885         case C_ISO_8859_3:
 886         case C_ISO_8859_4:
 887         case C_ISO_8859_5:
 888         case C_ISO_8859_6:
 889         case C_ISO_8859_7:
 890         case C_ISO_8859_8:
 891         case C_ISO_8859_9:
 892         case C_ISO_8859_10:
 893         case C_ISO_8859_11:
 894         case C_ISO_8859_13:
 895         case C_ISO_8859_14:
 896         case C_ISO_8859_15:
 897                 if (dest_charset == C_AUTO &&
 898                     (conv_get_current_charset() == src_charset ||
 899                      MB_CUR_MAX > 1))
 900                         code_conv = conv_latintodisp;
 901                 break;
 902         case C_SHIFT_JIS:
 903                 if (dest_charset == C_AUTO &&
 904                     conv_get_current_charset() == C_EUC_JP)
 905                         code_conv = conv_sjistodisp;
 906                 else if (dest_charset == C_EUC_JP)
 907                         code_conv = conv_sjistoeuc;
 908                 break;
 909         case C_EUC_JP:
 910                 if (dest_charset == C_AUTO &&
 911                     conv_get_current_charset() == C_EUC_JP)
 912                         code_conv = conv_euctodisp;
 913                 else if (dest_charset == C_ISO_2022_JP   ||
 914                          dest_charset == C_ISO_2022_JP_2 ||
 915                          dest_charset == C_ISO_2022_JP_3)
 916                         code_conv = conv_euctojis;
 917                 break;
 918         default:
 919                 break;
 920         }
 921
 922         return code_conv;
 923 }
 924
 925 #if HAVE_ICONV
 926 gchar *conv_iconv_strdup(const gchar *inbuf,
 927                          const gchar *src_code, const gchar *dest_code)
 928 {
 929         iconv_t cd;
 930         const gchar *inbuf_p;
 931         gchar *outbuf;
 932         gchar *outbuf_p;
 933         size_t in_size;
 934         size_t in_left;
 935         size_t out_size;
 936         size_t out_left;
 937         size_t n_conv;
 938         size_t len;
 939
 940         if (!src_code)
 941                 src_code = conv_get_outgoing_charset_str();
 942         if (!dest_code)
 943                 dest_code = conv_get_current_charset_str();
 944
 945         /* don't convert if current codeset is US-ASCII */
 946         if (!strcasecmp(dest_code, CS_US_ASCII))
 947                 return g_strdup(inbuf);
 948
 949         /* don't convert if src and dest codeset are identical */
 950         if (!strcasecmp(src_code, dest_code))
 951                 return g_strdup(inbuf);
 952
 953         cd = iconv_open(dest_code, src_code);
 954         if (cd == (iconv_t)-1)
 955                 return NULL;
 956
 957         inbuf_p = inbuf;
 958         in_size = strlen(inbuf);
 959         in_left = in_size;
 960         out_size = (in_size + 1) * 2;
 961         outbuf = g_malloc(out_size);
 962         outbuf_p = outbuf;
 963         out_left = out_size;
 964
 965 #define EXPAND_BUF()                            \
 966 {                                               \
 967         len = outbuf_p - outbuf;                \
 968         out_size *= 2;                          \
 969         outbuf = g_realloc(outbuf, out_size);   \
 970         outbuf_p = outbuf + len;                \
 971         out_left = out_size - len;              \
 972 }
 973
 974         while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
 975                                &outbuf_p, &out_left)) == (size_t)-1) {
 976                 if (EILSEQ == errno) {
 977                         inbuf_p++;
 978                         in_left--;
 979                         if (out_left == 0) {
 980                                 EXPAND_BUF();
 981                         }
 982                         *outbuf_p++ = SUBST_CHAR;
 983                         out_left--;
 984                 } else if (EINVAL == errno) {
 985                         break;
 986                 } else if (E2BIG == errno) {
 987                         EXPAND_BUF();
 988                 } else {
 989                         g_warning("conv_iconv_strdup(): %s\n",
 990                                   g_strerror(errno));
 991                         break;
 992                 }
 993         }
 994
 995         while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) ==
 996                (size_t)-1) {
 997                 if (E2BIG == errno) {
 998                         EXPAND_BUF();
 999                 } else {
1000                         g_warning("conv_iconv_strdup(): %s\n",
1001                                   g_strerror(errno));
1002                         break;
1003                 }
1004         }
1005
1006 #undef EXPAND_BUF
1007
1008         len = outbuf_p - outbuf;
1009         outbuf = g_realloc(outbuf, len + 1);
1010         outbuf[len] = '\0';
1011
1012         iconv_close(cd);
1013
1014         return outbuf;
1015 }
1016 #endif /* HAVE_ICONV */
1017
1018 static const struct {
1019         CharSet charset;
1020         gchar *const name;
1021 } charsets[] = {
1022         {C_US_ASCII,            CS_US_ASCII},
1023         {C_US_ASCII,            CS_ANSI_X3_4_1968},
1024         {C_UTF_8,               CS_UTF_8},
1025         {C_UTF_7,               CS_UTF_7},
1026         {C_ISO_8859_1,          CS_ISO_8859_1},
1027         {C_ISO_8859_2,          CS_ISO_8859_2},
1028         {C_ISO_8859_3,          CS_ISO_8859_3},
1029         {C_ISO_8859_4,          CS_ISO_8859_4},
1030         {C_ISO_8859_5,          CS_ISO_8859_5},
1031         {C_ISO_8859_6,          CS_ISO_8859_6},
1032         {C_ISO_8859_7,          CS_ISO_8859_7},
1033         {C_ISO_8859_8,          CS_ISO_8859_8},
1034         {C_ISO_8859_9,          CS_ISO_8859_9},
1035         {C_ISO_8859_10,         CS_ISO_8859_10},
1036         {C_ISO_8859_11,         CS_ISO_8859_11},
1037         {C_ISO_8859_13,         CS_ISO_8859_13},
1038         {C_ISO_8859_14,         CS_ISO_8859_14},
1039         {C_ISO_8859_15,         CS_ISO_8859_15},
1040         {C_BALTIC,              CS_BALTIC},
1041         {C_CP1250,              CS_CP1250},
1042         {C_CP1251,              CS_CP1251},
1043         {C_CP1252,              CS_CP1252},
1044         {C_CP1253,              CS_CP1253},
1045         {C_CP1254,              CS_CP1254},
1046         {C_CP1255,              CS_CP1255},
1047         {C_CP1256,              CS_CP1256},
1048         {C_CP1257,              CS_CP1257},
1049         {C_CP1258,              CS_CP1258},
1050         {C_WINDOWS_1250,        CS_WINDOWS_1250},
1051         {C_WINDOWS_1251,        CS_WINDOWS_1251},
1052         {C_WINDOWS_1252,        CS_WINDOWS_1252},
1053         {C_WINDOWS_1253,        CS_WINDOWS_1253},
1054         {C_WINDOWS_1254,        CS_WINDOWS_1254},
1055         {C_WINDOWS_1255,        CS_WINDOWS_1255},
1056         {C_WINDOWS_1256,        CS_WINDOWS_1256},
1057         {C_WINDOWS_1257,        CS_WINDOWS_1257},
1058         {C_WINDOWS_1258,        CS_WINDOWS_1258},
1059         {C_KOI8_R,              CS_KOI8_R},
1060         {C_KOI8_T,              CS_KOI8_T},
1061         {C_KOI8_U,              CS_KOI8_U},
1062         {C_ISO_2022_JP,         CS_ISO_2022_JP},
1063         {C_ISO_2022_JP_2,       CS_ISO_2022_JP_2},
1064         {C_ISO_2022_JP_3,       CS_ISO_2022_JP_3},
1065         {C_EUC_JP,              CS_EUC_JP},
1066         {C_EUC_JP,              CS_EUCJP},
1067         {C_SHIFT_JIS,           CS_SHIFT_JIS},
1068         {C_SHIFT_JIS,           CS_SHIFT__JIS},
1069         {C_SHIFT_JIS,           CS_SJIS},
1070         {C_ISO_2022_KR,         CS_ISO_2022_KR},
1071         {C_EUC_KR,              CS_EUC_KR},
1072         {C_ISO_2022_CN,         CS_ISO_2022_CN},
1073         {C_EUC_CN,              CS_EUC_CN},
1074         {C_GB2312,              CS_GB2312},
1075         {C_GBK,                 CS_GBK},
1076         {C_EUC_TW,              CS_EUC_TW},
1077         {C_BIG5,                CS_BIG5},
1078         {C_BIG5_HKSCS,          CS_BIG5_HKSCS},
1079         {C_TIS_620,             CS_TIS_620},
1080         {C_WINDOWS_874,         CS_WINDOWS_874},
1081         {C_GEORGIAN_PS,         CS_GEORGIAN_PS},
1082         {C_TCVN5712_1,          CS_TCVN5712_1},
1083 };
1084
1085 static const struct {
1086         gchar *const locale;
1087         CharSet charset;
1088         CharSet out_charset;
1089 } locale_table[] = {
1090         {"ja_JP.eucJP"  , C_EUC_JP      , C_ISO_2022_JP},
1091         {"ja_JP.EUC-JP" , C_EUC_JP      , C_ISO_2022_JP},
1092         {"ja_JP.EUC"    , C_EUC_JP      , C_ISO_2022_JP},
1093         {"ja_JP.ujis"   , C_EUC_JP      , C_ISO_2022_JP},
1094         {"ja_JP.SJIS"   , C_SHIFT_JIS   , C_ISO_2022_JP},
1095         {"ja_JP.JIS"    , C_ISO_2022_JP , C_ISO_2022_JP},
1096         {"ja_JP"        , C_EUC_JP      , C_ISO_2022_JP},
1097         {"ko_KR.EUC-KR" , C_EUC_KR      , C_EUC_KR},
1098         {"ko_KR"        , C_EUC_KR      , C_EUC_KR},
1099         {"zh_CN.GB2312" , C_GB2312      , C_GB2312},
1100         {"zh_CN.GBK"    , C_GBK         , C_GB2312},
1101         {"zh_CN"        , C_GB2312      , C_GB2312},
1102         {"zh_HK"        , C_BIG5_HKSCS  , C_BIG5_HKSCS},
1103         {"zh_TW.eucTW"  , C_EUC_TW      , C_BIG5},
1104         {"zh_TW.EUC-TW" , C_EUC_TW      , C_BIG5},
1105         {"zh_TW.Big5"   , C_BIG5        , C_BIG5},
1106         {"zh_TW"        , C_BIG5        , C_BIG5},
1107
1108         {"ru_RU.KOI8-R" , C_KOI8_R      , C_KOI8_R},
1109         {"ru_RU.KOI8R"  , C_KOI8_R      , C_KOI8_R},
1110         {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
1111         {"ru_RU"        , C_ISO_8859_5  , C_KOI8_R},
1112         {"tg_TJ"        , C_KOI8_T      , C_KOI8_T},
1113         {"ru_UA"        , C_KOI8_U      , C_KOI8_U},
1114         {"uk_UA.CP1251" , C_WINDOWS_1251, C_KOI8_U},
1115         {"uk_UA"        , C_KOI8_U      , C_KOI8_U},
1116
1117         {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
1118         {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
1119
1120         {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
1121
1122         {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1123         {"br_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1124         {"ca_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1125         {"da_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
1126         {"de_AT"        , C_ISO_8859_1  , C_ISO_8859_1},
1127         {"de_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1128         {"de_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1129         {"de_DE"        , C_ISO_8859_1  , C_ISO_8859_1},
1130         {"de_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1131         {"en_AU"        , C_ISO_8859_1  , C_ISO_8859_1},
1132         {"en_BW"        , C_ISO_8859_1  , C_ISO_8859_1},
1133         {"en_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1134         {"en_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
1135         {"en_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1136         {"en_HK"        , C_ISO_8859_1  , C_ISO_8859_1},
1137         {"en_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1138         {"en_NZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1139         {"en_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1140         {"en_SG"        , C_ISO_8859_1  , C_ISO_8859_1},
1141         {"en_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1142         {"en_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1143         {"en_ZW"        , C_ISO_8859_1  , C_ISO_8859_1},
1144         {"es_AR"        , C_ISO_8859_1  , C_ISO_8859_1},
1145         {"es_BO"        , C_ISO_8859_1  , C_ISO_8859_1},
1146         {"es_CL"        , C_ISO_8859_1  , C_ISO_8859_1},
1147         {"es_CO"        , C_ISO_8859_1  , C_ISO_8859_1},
1148         {"es_CR"        , C_ISO_8859_1  , C_ISO_8859_1},
1149         {"es_DO"        , C_ISO_8859_1  , C_ISO_8859_1},
1150         {"es_EC"        , C_ISO_8859_1  , C_ISO_8859_1},
1151         {"es_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1152         {"es_GT"        , C_ISO_8859_1  , C_ISO_8859_1},
1153         {"es_HN"        , C_ISO_8859_1  , C_ISO_8859_1},
1154         {"es_MX"        , C_ISO_8859_1  , C_ISO_8859_1},
1155         {"es_NI"        , C_ISO_8859_1  , C_ISO_8859_1},
1156         {"es_PA"        , C_ISO_8859_1  , C_ISO_8859_1},
1157         {"es_PE"        , C_ISO_8859_1  , C_ISO_8859_1},
1158         {"es_PR"        , C_ISO_8859_1  , C_ISO_8859_1},
1159         {"es_PY"        , C_ISO_8859_1  , C_ISO_8859_1},
1160         {"es_SV"        , C_ISO_8859_1  , C_ISO_8859_1},
1161         {"es_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1162         {"es_UY"        , C_ISO_8859_1  , C_ISO_8859_1},
1163         {"es_VE"        , C_ISO_8859_1  , C_ISO_8859_1},
1164         {"et_EE"        , C_ISO_8859_1  , C_ISO_8859_1},
1165         {"eu_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1166         {"fi_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1167         {"fo_FO"        , C_ISO_8859_1  , C_ISO_8859_1},
1168         {"fr_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1169         {"fr_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1170         {"fr_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1171         {"fr_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1172         {"fr_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1173         {"ga_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1174         {"gl_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1175         {"gv_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1176         {"id_ID"        , C_ISO_8859_1  , C_ISO_8859_1},
1177         {"is_IS"        , C_ISO_8859_1  , C_ISO_8859_1},
1178         {"it_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1179         {"it_IT"        , C_ISO_8859_1  , C_ISO_8859_1},
1180         {"kl_GL"        , C_ISO_8859_1  , C_ISO_8859_1},
1181         {"kw_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1182         {"ms_MY"        , C_ISO_8859_1  , C_ISO_8859_1},
1183         {"nl_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1184         {"nl_NL"        , C_ISO_8859_1  , C_ISO_8859_1},
1185         {"nn_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1186         {"no_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1187         {"oc_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1188         {"pt_BR"        , C_ISO_8859_1  , C_ISO_8859_1},
1189         {"pt_PT"        , C_ISO_8859_1  , C_ISO_8859_1},
1190         {"sq_AL"        , C_ISO_8859_1  , C_ISO_8859_1},
1191         {"sv_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1192         {"sv_SE"        , C_ISO_8859_1  , C_ISO_8859_1},
1193         {"tl_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1194         {"uz_UZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1195         {"wa_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1196
1197         {"bs_BA"        , C_ISO_8859_2  , C_ISO_8859_2},
1198         {"cs_CZ"        , C_ISO_8859_2  , C_ISO_8859_2},
1199         {"hr_HR"        , C_ISO_8859_2  , C_ISO_8859_2},
1200         {"hu_HU"        , C_ISO_8859_2  , C_ISO_8859_2},
1201         {"pl_PL"        , C_ISO_8859_2  , C_ISO_8859_2},
1202         {"ro_RO"        , C_ISO_8859_2  , C_ISO_8859_2},
1203         {"sk_SK"        , C_ISO_8859_2  , C_ISO_8859_2},
1204         {"sl_SI"        , C_ISO_8859_2  , C_ISO_8859_2},
1205
1206         {"sr_YU@cyrillic"       , C_ISO_8859_5  , C_ISO_8859_5},
1207         {"sr_YU"                , C_ISO_8859_2  , C_ISO_8859_2},
1208
1209         {"mt_MT"                , C_ISO_8859_3  , C_ISO_8859_3},
1210
1211         {"lt_LT.iso88594"       , C_ISO_8859_4  , C_ISO_8859_4},
1212         {"lt_LT.ISO8859-4"      , C_ISO_8859_4  , C_ISO_8859_4},
1213         {"lt_LT.ISO_8859-4"     , C_ISO_8859_4  , C_ISO_8859_4},
1214         {"lt_LT"                , C_ISO_8859_13 , C_ISO_8859_13},
1215
1216         {"mk_MK"        , C_ISO_8859_5  , C_ISO_8859_5},
1217
1218         {"ar_AE"        , C_ISO_8859_6  , C_ISO_8859_6},
1219         {"ar_BH"        , C_ISO_8859_6  , C_ISO_8859_6},
1220         {"ar_DZ"        , C_ISO_8859_6  , C_ISO_8859_6},
1221         {"ar_EG"        , C_ISO_8859_6  , C_ISO_8859_6},
1222         {"ar_IQ"        , C_ISO_8859_6  , C_ISO_8859_6},
1223         {"ar_JO"        , C_ISO_8859_6  , C_ISO_8859_6},
1224         {"ar_KW"        , C_ISO_8859_6  , C_ISO_8859_6},
1225         {"ar_LB"        , C_ISO_8859_6  , C_ISO_8859_6},
1226         {"ar_LY"        , C_ISO_8859_6  , C_ISO_8859_6},
1227         {"ar_MA"        , C_ISO_8859_6  , C_ISO_8859_6},
1228         {"ar_OM"        , C_ISO_8859_6  , C_ISO_8859_6},
1229         {"ar_QA"        , C_ISO_8859_6  , C_ISO_8859_6},
1230         {"ar_SA"        , C_ISO_8859_6  , C_ISO_8859_6},
1231         {"ar_SD"        , C_ISO_8859_6  , C_ISO_8859_6},
1232         {"ar_SY"        , C_ISO_8859_6  , C_ISO_8859_6},
1233         {"ar_TN"        , C_ISO_8859_6  , C_ISO_8859_6},
1234         {"ar_YE"        , C_ISO_8859_6  , C_ISO_8859_6},
1235
1236         {"el_GR"        , C_ISO_8859_7  , C_ISO_8859_7},
1237         {"he_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1238         {"iw_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1239         {"tr_TR"        , C_ISO_8859_9  , C_ISO_8859_9},
1240
1241         {"lv_LV"        , C_ISO_8859_13 , C_ISO_8859_13},
1242         {"mi_NZ"        , C_ISO_8859_13 , C_ISO_8859_13},
1243
1244         {"cy_GB"        , C_ISO_8859_14 , C_ISO_8859_14},
1245
1246         {"ar_IN"        , C_UTF_8       , C_UTF_8},
1247         {"en_IN"        , C_UTF_8       , C_UTF_8},
1248         {"se_NO"        , C_UTF_8       , C_UTF_8},
1249         {"ta_IN"        , C_UTF_8       , C_UTF_8},
1250         {"te_IN"        , C_UTF_8       , C_UTF_8},
1251         {"ur_PK"        , C_UTF_8       , C_UTF_8},
1252
1253         {"th_TH"        , C_TIS_620     , C_TIS_620},
1254         /* {"th_TH"     , C_WINDOWS_874}, */
1255         /* {"th_TH"     , C_ISO_8859_11}, */
1256
1257         {"ka_GE"        , C_GEORGIAN_PS , C_GEORGIAN_PS},
1258         {"vi_VN.TCVN"   , C_TCVN5712_1  , C_TCVN5712_1},
1259
1260         {"C"                    , C_US_ASCII    , C_US_ASCII},
1261         {"POSIX"                , C_US_ASCII    , C_US_ASCII},
1262         {"ANSI_X3.4-1968"       , C_US_ASCII    , C_US_ASCII},
1263 };
1264
1265 static GHashTable *conv_get_charset_to_str_table(void)
1266 {
1267         static GHashTable *table;
1268         gint i;
1269
1270         if (table)
1271                 return table;
1272
1273         table = g_hash_table_new(NULL, g_direct_equal);
1274
1275         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1276                 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1277                     == NULL) {
1278                         g_hash_table_insert
1279                                 (table, GUINT_TO_POINTER(charsets[i].charset),
1280                                  charsets[i].name);
1281                 }
1282         }
1283
1284         return table;
1285 }
1286
1287 static GHashTable *conv_get_charset_from_str_table(void)
1288 {
1289         static GHashTable *table;
1290         gint i;
1291
1292         if (table)
1293                 return table;
1294
1295         table = g_hash_table_new(str_case_hash, str_case_equal);
1296
1297         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1298                 g_hash_table_insert(table, charsets[i].name,
1299                                     GUINT_TO_POINTER(charsets[i].charset));
1300         }
1301
1302         return table;
1303 }
1304
1305 const gchar *conv_get_charset_str(CharSet charset)
1306 {
1307         GHashTable *table;
1308
1309         table = conv_get_charset_to_str_table();
1310         return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1311 }
1312
1313 CharSet conv_get_charset_from_str(const gchar *charset)
1314 {
1315         GHashTable *table;
1316
1317         if (!charset) return C_AUTO;
1318
1319         table = conv_get_charset_from_str_table();
1320         return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1321 }
1322
1323 CharSet conv_get_current_charset(void)
1324 {
1325         static CharSet cur_charset = -1;
1326         const gchar *cur_locale;
1327         const gchar *p;
1328         gint i;
1329
1330         if (cur_charset != -1)
1331                 return cur_charset;
1332
1333         cur_locale = conv_get_current_locale();
1334         if (!cur_locale) {
1335                 cur_charset = C_US_ASCII;
1336                 return cur_charset;
1337         }
1338
1339         if (strcasestr(cur_locale, "UTF-8")) {
1340                 cur_charset = C_UTF_8;
1341                 return cur_charset;
1342         }
1343
1344         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1345                 cur_charset = C_ISO_8859_15;
1346                 return cur_charset;
1347         }
1348
1349         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1350                 const gchar *p;
1351
1352                 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1353                    "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1354                 if (!strncasecmp(cur_locale, locale_table[i].locale,
1355                                  strlen(locale_table[i].locale))) {
1356                         cur_charset = locale_table[i].charset;
1357                         return cur_charset;
1358                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1359                          !strchr(p + 1, '.')) {
1360                         if (strlen(cur_locale) == 2 &&
1361                             !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1362                                 cur_charset = locale_table[i].charset;
1363                                 return cur_charset;
1364                         }
1365                 }
1366         }
1367
1368         cur_charset = C_AUTO;
1369         return cur_charset;
1370 }
1371
1372 const gchar *conv_get_current_charset_str(void)
1373 {
1374         static const gchar *codeset = NULL;
1375
1376         if (!codeset)
1377                 codeset = conv_get_charset_str(conv_get_current_charset());
1378
1379         return codeset ? codeset : CS_US_ASCII;
1380 }
1381
1382 CharSet conv_get_outgoing_charset(void)
1383 {
1384         static CharSet out_charset = -1;
1385         const gchar *cur_locale;
1386         const gchar *p;
1387         gint i;
1388
1389         if (out_charset != -1)
1390                 return out_charset;
1391
1392         cur_locale = conv_get_current_locale();
1393         if (!cur_locale) {
1394                 out_charset = C_AUTO;
1395                 return out_charset;
1396         }
1397
1398         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1399                 out_charset = C_ISO_8859_15;
1400                 return out_charset;
1401         }
1402
1403         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1404                 const gchar *p;
1405
1406                 if (!strncasecmp(cur_locale, locale_table[i].locale,
1407                                  strlen(locale_table[i].locale))) {
1408                         out_charset = locale_table[i].out_charset;
1409                         break;
1410                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1411                          !strchr(p + 1, '.')) {
1412                         if (strlen(cur_locale) == 2 &&
1413                             !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1414                                 out_charset = locale_table[i].out_charset;
1415                                 break;
1416                         }
1417                 }
1418         }
1419
1420 #if !HAVE_ICONV
1421         /* encoding conversion without iconv() is only supported
1422            on Japanese locale for now */
1423         if (out_charset == C_ISO_2022_JP)
1424                 return out_charset;
1425         else
1426                 return conv_get_current_charset();
1427 #endif
1428
1429         return out_charset;
1430 }
1431
1432 const gchar *conv_get_outgoing_charset_str(void)
1433 {
1434         CharSet out_charset;
1435         const gchar *str;
1436
1437         if (prefs_common.outgoing_charset) {
1438                 if (!isalpha((guchar)prefs_common.outgoing_charset[0])) {
1439                         g_free(prefs_common.outgoing_charset);
1440                         prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1441                 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1442                         return prefs_common.outgoing_charset;
1443         }
1444
1445         out_charset = conv_get_outgoing_charset();
1446         str = conv_get_charset_str(out_charset);
1447
1448         return str ? str : CS_US_ASCII;
1449 }
1450
1451 gboolean conv_is_multibyte_encoding(CharSet encoding)
1452 {
1453         switch (encoding) {
1454         case C_EUC_JP:
1455         case C_EUC_KR:
1456         case C_EUC_TW:
1457         case C_EUC_CN:
1458         case C_ISO_2022_JP:
1459         case C_ISO_2022_JP_2:
1460         case C_ISO_2022_JP_3:
1461         case C_ISO_2022_KR:
1462         case C_ISO_2022_CN:
1463         case C_SHIFT_JIS:
1464         case C_GB2312:
1465         case C_BIG5:
1466         case C_UTF_8:
1467         case C_UTF_7:
1468                 return TRUE;
1469         default:
1470                 return FALSE;
1471         }
1472 }
1473
1474 const gchar *conv_get_current_locale(void)
1475 {
1476         const gchar *cur_locale;
1477
1478         cur_locale = g_getenv("LC_ALL");
1479         if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1480         if (!cur_locale) cur_locale = g_getenv("LANG");
1481         if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1482
1483         debug_print("current locale: %s\n",
1484                     cur_locale ? cur_locale : "(none)");
1485
1486         return cur_locale;
1487 }
1488
1489 void conv_unmime_header_overwrite(gchar *str)
1490 {
1491         gchar *buf;
1492         gint buflen;
1493         CharSet cur_charset;
1494         const gchar *locale;
1495
1496         g_return_if_fail(str != NULL);
1497
1498         cur_charset = conv_get_current_charset();
1499
1500 #warning FIXME_GTK2
1501 /* Should we always ensure to convert? */
1502         locale = conv_get_current_locale();
1503
1504         if (locale && !strncasecmp(locale, "ja", 2)) {
1505                 buflen = strlen(str) * 2 + 1;
1506                 Xalloca(buf, buflen, return);
1507                 conv_anytodisp(buf, buflen, str);
1508                 unmime_header(str, buf);
1509         } else {
1510                 buflen = strlen(str) + 1;
1511                 Xalloca(buf, buflen, return);
1512                 unmime_header(buf, str);
1513                 strncpy2(str, buf, buflen);
1514         }
1515 }
1516
1517 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1518                         const gchar *charset)
1519 {
1520         CharSet cur_charset;
1521         const gchar *locale;
1522
1523         cur_charset = conv_get_current_charset();
1524
1525 #warning FIXME_GTK2
1526 /* Should we always ensure to convert? */
1527         locale = conv_get_current_locale();
1528
1529         if (locale && !strncasecmp(locale, "ja", 2)) {
1530                 gchar *buf;
1531                 gint buflen;
1532
1533                 buflen = strlen(str) * 2 + 1;
1534                 Xalloca(buf, buflen, return);
1535                 conv_anytodisp(buf, buflen, str);
1536                 unmime_header(outbuf, buf);
1537         } else
1538                 unmime_header(outbuf, str);
1539 }
1540
1541 #define MAX_LINELEN             76
1542 #define MAX_HARD_LINELEN        996
1543 #define MIMESEP_BEGIN           "=?"
1544 #define MIMESEP_END             "?="
1545
1546 #define B64LEN(len)     ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1547
1548 #define LBREAK_IF_REQUIRED(cond, is_plain_text)                         \
1549 {                                                                       \
1550         if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) {         \
1551                 *destp = '\0';                                          \
1552                 return;                                                 \
1553         }                                                               \
1554                                                                         \
1555         if ((cond) && *srcp) {                                          \
1556                 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1557                         if (isspace(*(destp - 1)))                      \
1558                                 destp--;                                \
1559                         else if (is_plain_text && isspace(*srcp))       \
1560                                 srcp++;                                 \
1561                         if (*srcp) {                                    \
1562                                 *destp++ = '\n';                        \
1563                                 *destp++ = ' ';                         \
1564                                 left = MAX_LINELEN - 1;                 \
1565                         }                                               \
1566                 }                                                       \
1567         }                                                               \
1568 }
1569
1570 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1571                         gint header_len, gboolean addr_field)
1572 {
1573         const gchar *cur_encoding;
1574         const gchar *out_encoding;
1575         gint mimestr_len;
1576         gchar *mimesep_enc;
1577         gint left;
1578         const guchar *srcp = src;
1579         guchar *destp = dest;
1580         gboolean use_base64;
1581
1582         if (MB_CUR_MAX > 1) {
1583                 use_base64 = TRUE;
1584                 mimesep_enc = "?B?";
1585         } else {
1586                 use_base64 = FALSE;
1587                 mimesep_enc = "?Q?";
1588         }
1589
1590         cur_encoding = conv_get_current_charset_str();
1591         if (!strcmp(cur_encoding, CS_US_ASCII))
1592                 cur_encoding = CS_ISO_8859_1;
1593         out_encoding = conv_get_outgoing_charset_str();
1594         if (!strcmp(out_encoding, CS_US_ASCII))
1595                 out_encoding = CS_ISO_8859_1;
1596
1597         mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1598                 strlen(mimesep_enc) + strlen(MIMESEP_END);
1599
1600         left = MAX_LINELEN - header_len;
1601
1602         while (*srcp) {
1603                 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1604
1605                 while (isspace(*srcp)) {
1606                         *destp++ = *srcp++;
1607                         left--;
1608                         LBREAK_IF_REQUIRED(left <= 0, TRUE);
1609                 }
1610
1611                 /* output as it is if the next word is ASCII string */
1612                 if (!is_next_nonascii(srcp)) {
1613                         gint word_len;
1614
1615                         word_len = get_next_word_len(srcp);
1616                         LBREAK_IF_REQUIRED(left < word_len, TRUE);
1617                         while (word_len > 0) {
1618                                 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1619                                 *destp++ = *srcp++;
1620                                 left--;
1621                                 word_len--;
1622                         }
1623
1624                         continue;
1625                 }
1626
1627                 /* don't include parentheses in encoded strings */
1628                 if (addr_field && (*srcp == '(' || *srcp == ')')) {
1629                         LBREAK_IF_REQUIRED(left < 2, FALSE);
1630                         *destp++ = *srcp++;
1631                         left--;
1632                 }
1633
1634                 while (1) {
1635                         gint mb_len = 0;
1636                         gint cur_len = 0;
1637                         gchar *part_str;
1638                         gchar *out_str;
1639                         gchar *enc_str;
1640                         const guchar *p = srcp;
1641                         gint out_str_len;
1642                         gint out_enc_str_len;
1643                         gint mime_block_len;
1644                         gboolean cont = FALSE;
1645
1646                         while (*p != '\0') {
1647                                 if (isspace(*p) && !is_next_nonascii(p + 1))
1648                                         break;
1649                                 /* don't include parentheses in encoded
1650                                    strings */
1651                                 if (addr_field && (*p == '(' || *p == ')'))
1652                                         break;
1653
1654                                 if (MB_CUR_MAX > 1) {
1655                                         mb_len = mblen(p, MB_CUR_MAX);
1656                                         if (mb_len < 0) {
1657                                                 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1658                                                 mb_len = 1;
1659                                         }
1660                                 } else
1661                                         mb_len = 1;
1662
1663                                 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1664                                 out_str = conv_codeset_strdup
1665                                         (part_str, cur_encoding, out_encoding);
1666                                 if (!out_str) {
1667                                         g_warning("conv_encode_header(): code conversion failed\n");
1668                                         conv_unreadable_8bit(part_str);
1669                                         out_str = g_strdup(part_str);
1670                                 }
1671                                 out_str_len = strlen(out_str);
1672
1673                                 if (use_base64)
1674                                         out_enc_str_len = B64LEN(out_str_len);
1675                                 else
1676                                         out_enc_str_len =
1677                                                 qp_get_q_encoding_len(out_str);
1678
1679                                 g_free(out_str);
1680
1681                                 if (mimestr_len + out_enc_str_len <= left) {
1682                                         cur_len += mb_len;
1683                                         p += mb_len;
1684                                 } else if (cur_len == 0) {
1685                                         LBREAK_IF_REQUIRED(1, FALSE);
1686                                         continue;
1687                                 } else {
1688                                         cont = TRUE;
1689                                         break;
1690                                 }
1691                         }
1692
1693                         if (cur_len > 0) {
1694                                 Xstrndup_a(part_str, srcp, cur_len, );
1695                                 out_str = conv_codeset_strdup
1696                                         (part_str, cur_encoding, out_encoding);
1697                                 if (!out_str) {
1698                                         g_warning("conv_encode_header(): code conversion failed\n");
1699                                         conv_unreadable_8bit(part_str);
1700                                         out_str = g_strdup(part_str);
1701                                 }
1702                                 out_str_len = strlen(out_str);
1703
1704                                 if (use_base64)
1705                                         out_enc_str_len = B64LEN(out_str_len);
1706                                 else
1707                                         out_enc_str_len =
1708                                                 qp_get_q_encoding_len(out_str);
1709
1710                                 Xalloca(enc_str, out_enc_str_len + 1, );
1711                                 if (use_base64)
1712                                         base64_encode(enc_str, out_str, out_str_len);
1713                                 else
1714                                         qp_q_encode(enc_str, out_str);
1715
1716                                 g_free(out_str);
1717
1718                                 /* output MIME-encoded string block */
1719                                 mime_block_len = mimestr_len + strlen(enc_str);
1720                                 g_snprintf(destp, mime_block_len + 1,
1721                                            MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1722                                            out_encoding, mimesep_enc, enc_str);
1723                                 destp += mime_block_len;
1724                                 srcp += cur_len;
1725
1726                                 left -= mime_block_len;
1727                         }
1728
1729                         LBREAK_IF_REQUIRED(cont, FALSE);
1730
1731                         if (cur_len == 0)
1732                                 break;
1733                 }
1734         }
1735
1736         *destp = '\0';
1737 }
1738
1739 #undef LBREAK_IF_REQUIRED