src/codeconv.c

   1 /*
   2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
   3  * Copyright (C) 1999-2003 Hiroyuki Yamamoto
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 #  include "config.h"
  22 #endif
  23
  24 #include <glib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include <stdlib.h>
  28 #include <errno.h>
  29
  30 #if HAVE_LOCALE_H
  31 #  include <locale.h>
  32 #endif
  33
  34 #if HAVE_ICONV
  35 #  include <iconv.h>
  36 #endif
  37
  38 #include "intl.h"
  39 #include "codeconv.h"
  40 #include "unmime.h"
  41 #include "base64.h"
  42 #include "quoted-printable.h"
  43 #include "utils.h"
  44 #include "prefs_common.h"
  45
  46 typedef enum
  47 {
  48         JIS_ASCII,
  49         JIS_KANJI,
  50         JIS_HWKANA,
  51         JIS_AUXKANJI
  52 } JISState;
  53
  54 #define SUBST_CHAR      '_'
  55 #define ESC             '\033'
  56
  57 #define iseuckanji(c) \
  58         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
  59 #define iseuchwkana1(c) \
  60         (((c) & 0xff) == 0x8e)
  61 #define iseuchwkana2(c) \
  62         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  63 #define iseucaux(c) \
  64         (((c) & 0xff) == 0x8f)
  65 #define issjiskanji1(c) \
  66         ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
  67          (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
  68 #define issjiskanji2(c) \
  69         ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
  70          (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
  71 #define issjishwkana(c) \
  72         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  73
  74 #define K_IN()                          \
  75         if (state != JIS_KANJI) {       \
  76                 *out++ = ESC;           \
  77                 *out++ = '$';           \
  78                 *out++ = 'B';           \
  79                 state = JIS_KANJI;      \
  80         }
  81
  82 #define K_OUT()                         \
  83         if (state != JIS_ASCII) {       \
  84                 *out++ = ESC;           \
  85                 *out++ = '(';           \
  86                 *out++ = 'B';           \
  87                 state = JIS_ASCII;      \
  88         }
  89
  90 #define HW_IN()                         \
  91         if (state != JIS_HWKANA) {      \
  92                 *out++ = ESC;           \
  93                 *out++ = '(';           \
  94                 *out++ = 'I';           \
  95                 state = JIS_HWKANA;     \
  96         }
  97
  98 #define AUX_IN()                        \
  99         if (state != JIS_AUXKANJI) {    \
 100                 *out++ = ESC;           \
 101                 *out++ = '$';           \
 102                 *out++ = '(';           \
 103                 *out++ = 'D';           \
 104                 state = JIS_AUXKANJI;   \
 105         }
 106
 107 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 108 {
 109         const guchar *in = inbuf;
 110         guchar *out = outbuf;
 111         JISState state = JIS_ASCII;
 112
 113         while (*in != '\0') {
 114                 if (*in == ESC) {
 115                         in++;
 116                         if (*in == '$') {
 117                                 if (*(in + 1) == '@' || *(in + 1) == 'B') {
 118                                         state = JIS_KANJI;
 119                                         in += 2;
 120                                 } else if (*(in + 1) == '(' &&
 121                                            *(in + 2) == 'D') {
 122                                         state = JIS_AUXKANJI;
 123                                         in += 3;
 124                                 } else {
 125                                         /* unknown escape sequence */
 126                                         state = JIS_ASCII;
 127                                 }
 128                         } else if (*in == '(') {
 129                                 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
 130                                         state = JIS_ASCII;
 131                                         in += 2;
 132                                 } else if (*(in + 1) == 'I') {
 133                                         state = JIS_HWKANA;
 134                                         in += 2;
 135                                 } else {
 136                                         /* unknown escape sequence */
 137                                         state = JIS_ASCII;
 138                                 }
 139                         } else {
 140                                 /* unknown escape sequence */
 141                                 state = JIS_ASCII;
 142                         }
 143                 } else if (*in == 0x0e) {
 144                         state = JIS_HWKANA;
 145                         in++;
 146                 } else if (*in == 0x0f) {
 147                         state = JIS_ASCII;
 148                         in++;
 149                 } else {
 150                         switch (state) {
 151                         case JIS_ASCII:
 152                                 *out++ = *in++;
 153                                 break;
 154                         case JIS_KANJI:
 155                                 *out++ = *in++ | 0x80;
 156                                 if (*in == '\0') break;
 157                                 *out++ = *in++ | 0x80;
 158                                 break;
 159                         case JIS_HWKANA:
 160                                 *out++ = 0x8e;
 161                                 *out++ = *in++ | 0x80;
 162                                 break;
 163                         case JIS_AUXKANJI:
 164                                 *out++ = 0x8f;
 165                                 *out++ = *in++ | 0x80;
 166                                 if (*in == '\0') break;
 167                                 *out++ = *in++ | 0x80;
 168                                 break;
 169                         }
 170                 }
 171         }
 172
 173         *out = '\0';
 174 }
 175
 176 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 177 {
 178         const guchar *in = inbuf;
 179         guchar *out = outbuf;
 180         JISState state = JIS_ASCII;
 181
 182         while (*in != '\0') {
 183                 if (isascii(*in)) {
 184                         K_OUT();
 185                         *out++ = *in++;
 186                 } else if (iseuckanji(*in)) {
 187                         if (iseuckanji(*(in + 1))) {
 188                                 K_IN();
 189                                 *out++ = *in++ & 0x7f;
 190                                 *out++ = *in++ & 0x7f;
 191                         } else {
 192                                 K_OUT();
 193                                 *out++ = SUBST_CHAR;
 194                                 in++;
 195                                 if (*in != '\0' && !isascii(*in)) {
 196                                         *out++ = SUBST_CHAR;
 197                                         in++;
 198                                 }
 199                         }
 200                 } else if (iseuchwkana1(*in)) {
 201                         in++;
 202                         if (iseuchwkana2(*in)) {
 203                                 HW_IN();
 204                                 *out++ = *in++ & 0x7f;
 205                         } else {
 206                                 K_OUT();
 207                                 if (*in != '\0' && !isascii(*in)) {
 208                                         *out++ = SUBST_CHAR;
 209                                         in++;
 210                                 }
 211                         }
 212                 } else if (iseucaux(*in)) {
 213                         in++;
 214                         if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
 215                                 AUX_IN();
 216                                 *out++ = *in++ & 0x7f;
 217                                 *out++ = *in++ & 0x7f;
 218                         } else {
 219                                 K_OUT();
 220                                 if (*in != '\0' && !isascii(*in)) {
 221                                         *out++ = SUBST_CHAR;
 222                                         in++;
 223                                         if (*in != '\0' && !isascii(*in)) {
 224                                                 *out++ = SUBST_CHAR;
 225                                                 in++;
 226                                         }
 227                                 }
 228                         }
 229                 } else {
 230                         K_OUT();
 231                         *out++ = SUBST_CHAR;
 232                         in++;
 233                 }
 234         }
 235
 236         K_OUT();
 237         *out = '\0';
 238 }
 239
 240 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 241 {
 242         const guchar *in = inbuf;
 243         guchar *out = outbuf;
 244
 245         while (*in != '\0') {
 246                 if (isascii(*in)) {
 247                         *out++ = *in++;
 248                 } else if (issjiskanji1(*in)) {
 249                         if (issjiskanji2(*(in + 1))) {
 250                                 guchar out1 = *in;
 251                                 guchar out2 = *(in + 1);
 252                                 guchar row;
 253
 254                                 row = out1 < 0xa0 ? 0x70 : 0xb0;
 255                                 if (out2 < 0x9f) {
 256                                         out1 = (out1 - row) * 2 - 1;
 257                                         out2 -= out2 > 0x7f ? 0x20 : 0x1f;
 258                                 } else {
 259                                         out1 = (out1 - row) * 2;
 260                                         out2 -= 0x7e;
 261                                 }
 262
 263                                 *out++ = out1 | 0x80;
 264                                 *out++ = out2 | 0x80;
 265                                 in += 2;
 266                         } else {
 267                                 *out++ = SUBST_CHAR;
 268                                 in++;
 269                                 if (*in != '\0' && !isascii(*in)) {
 270                                         *out++ = SUBST_CHAR;
 271                                         in++;
 272                                 }
 273                         }
 274                 } else if (issjishwkana(*in)) {
 275                         *out++ = 0x8e;
 276                         *out++ = *in++;
 277                 } else {
 278                         *out++ = SUBST_CHAR;
 279                         in++;
 280                 }
 281         }
 282
 283         *out = '\0';
 284 }
 285
 286 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 287 {
 288         switch (conv_guess_ja_encoding(inbuf)) {
 289         case C_ISO_2022_JP:
 290                 conv_jistoeuc(outbuf, outlen, inbuf);
 291                 break;
 292         case C_SHIFT_JIS:
 293                 conv_sjistoeuc(outbuf, outlen, inbuf);
 294                 break;
 295         default:
 296                 strncpy2(outbuf, inbuf, outlen);
 297                 break;
 298         }
 299 }
 300
 301 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 302 {
 303         switch (conv_guess_ja_encoding(inbuf)) {
 304         case C_EUC_JP:
 305                 conv_euctojis(outbuf, outlen, inbuf);
 306                 break;
 307         default:
 308                 strncpy2(outbuf, inbuf, outlen);
 309                 break;
 310         }
 311 }
 312
 313 static gchar valid_eucjp_tbl[][96] = {
 314         /* 0xa2a0 - 0xa2ff */
 315         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
 316           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 317           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 318           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
 319           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 320           0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
 321
 322         /* 0xa3a0 - 0xa3ff */
 323         { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 324           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
 325           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 326           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 327           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 328           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
 329
 330         /* 0xa4a0 - 0xa4ff */
 331         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 332           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 333           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 334           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 335           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 336           1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 337
 338         /* 0xa5a0 - 0xa5ff */
 339         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 340           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 341           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 342           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 343           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 344           1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 345
 346         /* 0xa6a0 - 0xa6ff */
 347         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 348           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 349           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 350           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 351           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 352           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 353
 354         /* 0xa7a0 - 0xa7ff */
 355         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 356           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 357           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 358           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 359           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 360           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 361
 362         /* 0xa8a0 - 0xa8ff */
 363         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 364           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 365           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 366           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 367           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 368           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
 369 };
 370
 371 static gboolean isprintableeuckanji(guchar c1, guchar c2)
 372 {
 373         if (c1 <= 0xa0 || c1 >= 0xf5)
 374                 return FALSE;
 375         if (c2 <= 0xa0 || c2 == 0xff)
 376                 return FALSE;
 377
 378         if (c1 >= 0xa9 && c1 <= 0xaf)
 379                 return FALSE;
 380
 381         if (c1 >= 0xa2 && c1 <= 0xa8)
 382                 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
 383
 384         if (c1 == 0xcf) {
 385                 if (c2 >= 0xd4 && c2 <= 0xff)
 386                         return FALSE;
 387         } else if (c1 == 0xf4) {
 388                 if (c2 >= 0xa7 && c2 <= 0xff)
 389                         return FALSE;
 390         }
 391
 392         return TRUE;
 393 }
 394
 395 void conv_unreadable_eucjp(gchar *str)
 396 {
 397         register guchar *p = str;
 398
 399         while (*p != '\0') {
 400                 if (isascii(*p)) {
 401                         /* convert CR+LF -> LF */
 402                         if (*p == '\r' && *(p + 1) == '\n')
 403                                 memmove(p, p + 1, strlen(p));
 404                         /* printable 7 bit code */
 405                         p++;
 406                 } else if (iseuckanji(*p)) {
 407                         if (isprintableeuckanji(*p, *(p + 1))) {
 408                                 /* printable euc-jp code */
 409                                 p += 2;
 410                         } else {
 411                                 /* substitute unprintable code */
 412                                 *p++ = SUBST_CHAR;
 413                                 if (*p != '\0') {
 414                                         if (isascii(*p))
 415                                                 p++;
 416                                         else
 417                                                 *p++ = SUBST_CHAR;
 418                                 }
 419                         }
 420                 } else if (iseuchwkana1(*p)) {
 421                         if (iseuchwkana2(*(p + 1)))
 422                                 /* euc-jp hankaku kana */
 423                                 p += 2;
 424                         else
 425                                 *p++ = SUBST_CHAR;
 426                 } else if (iseucaux(*p)) {
 427                         if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
 428                                 /* auxiliary kanji */
 429                                 p += 3;
 430                         } else
 431                                 *p++ = SUBST_CHAR;
 432                 } else
 433                         /* substitute unprintable 1 byte code */
 434                         *p++ = SUBST_CHAR;
 435         }
 436 }
 437
 438 void conv_unreadable_8bit(gchar *str)
 439 {
 440         register guchar *p = str;
 441
 442         while (*p != '\0') {
 443                 /* convert CR+LF -> LF */
 444                 if (*p == '\r' && *(p + 1) == '\n')
 445                         memmove(p, p + 1, strlen(p));
 446                 else if (!isascii(*p)) *p = SUBST_CHAR;
 447                 p++;
 448         }
 449 }
 450
 451 void conv_unreadable_latin(gchar *str)
 452 {
 453         register guchar *p = str;
 454
 455         while (*p != '\0') {
 456                 /* convert CR+LF -> LF */
 457                 if (*p == '\r' && *(p + 1) == '\n')
 458                         memmove(p, p + 1, strlen(p));
 459                 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
 460                         *p = SUBST_CHAR;
 461                 p++;
 462         }
 463 }
 464
 465 #define NCV     '\0'
 466
 467 void conv_mb_alnum(gchar *str)
 468 {
 469         static guchar char_tbl[] = {
 470                 /* 0xa0 - 0xaf */
 471                 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
 472                 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
 473                 /* 0xb0 - 0xbf */
 474                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 475                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 476                 /* 0xc0 - 0xcf */
 477                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 478                 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
 479                 /* 0xd0 - 0xdf */
 480                 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
 481                 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
 482                 /* 0xe0 - 0xef */
 483                 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
 484                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
 485         };
 486
 487         register guchar *p = str;
 488         register gint len;
 489
 490         len = strlen(str);
 491
 492         while (len > 1) {
 493                 if (*p == 0xa3) {
 494                         register guchar ch = *(p + 1);
 495
 496                         if (ch >= 0xb0 && ch <= 0xfa) {
 497                                 /* [a-zA-Z] */
 498                                 *p = ch & 0x7f;
 499                                 p++;
 500                                 len--;
 501                                 memmove(p, p + 1, len);
 502                                 len--;
 503                         } else  {
 504                                 p += 2;
 505                                 len -= 2;
 506                         }
 507                 } else if (*p == 0xa1) {
 508                         register guchar ch = *(p + 1);
 509
 510                         if (ch >= 0xa0 && ch <= 0xef &&
 511                             NCV != char_tbl[ch - 0xa0]) {
 512                                 *p = char_tbl[ch - 0xa0];
 513                                 p++;
 514                                 len--;
 515                                 memmove(p, p + 1, len);
 516                                 len--;
 517                         } else {
 518                                 p += 2;
 519                                 len -= 2;
 520                         }
 521                 } else if (iseuckanji(*p)) {
 522                         p += 2;
 523                         len -= 2;
 524                 } else {
 525                         p++;
 526                         len--;
 527                 }
 528         }
 529 }
 530
 531 CharSet conv_guess_ja_encoding(const gchar *str)
 532 {
 533         const guchar *p = str;
 534         CharSet guessed = C_US_ASCII;
 535
 536         while (*p != '\0') {
 537                 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
 538                         if (guessed == C_US_ASCII)
 539                                 return C_ISO_2022_JP;
 540                         p += 2;
 541                 } else if (isascii(*p)) {
 542                         p++;
 543                 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
 544                         if (*p >= 0xfd && *p <= 0xfe)
 545                                 return C_EUC_JP;
 546                         else if (guessed == C_SHIFT_JIS) {
 547                                 if ((issjiskanji1(*p) &&
 548                                      issjiskanji2(*(p + 1))) ||
 549                                     issjishwkana(*p))
 550                                         guessed = C_SHIFT_JIS;
 551                                 else
 552                                         guessed = C_EUC_JP;
 553                         } else
 554                                 guessed = C_EUC_JP;
 555                         p += 2;
 556                 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
 557                         if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
 558                                 guessed = C_SHIFT_JIS;
 559                         else
 560                                 return C_SHIFT_JIS;
 561                         p += 2;
 562                 } else if (issjishwkana(*p)) {
 563                         guessed = C_SHIFT_JIS;
 564                         p++;
 565                 } else {
 566                         p++;
 567                 }
 568         }
 569
 570         return guessed;
 571 }
 572
 573 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 574 {
 575         conv_jistoeuc(outbuf, outlen, inbuf);
 576         conv_unreadable_eucjp(outbuf);
 577 }
 578
 579 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 580 {
 581         conv_sjistoeuc(outbuf, outlen, inbuf);
 582         conv_unreadable_eucjp(outbuf);
 583 }
 584
 585 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 586 {
 587         strncpy2(outbuf, inbuf, outlen);
 588         conv_unreadable_eucjp(outbuf);
 589 }
 590
 591 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 592 {
 593         conv_anytoeuc(outbuf, outlen, inbuf);
 594         conv_unreadable_eucjp(outbuf);
 595 }
 596
 597 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 598 {
 599         strncpy2(outbuf, inbuf, outlen);
 600         conv_unreadable_8bit(outbuf);
 601 }
 602
 603 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 604 {
 605         strncpy2(outbuf, inbuf, outlen);
 606         conv_unreadable_latin(outbuf);
 607 }
 608
 609 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
 610 {
 611         strncpy2(outbuf, inbuf, outlen);
 612 }
 613
 614 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 615 {
 616         strncpy2(outbuf, inbuf, outlen);
 617
 618         switch (conv_get_current_charset()) {
 619         case C_US_ASCII:
 620         case C_ISO_8859_1:
 621         case C_ISO_8859_2:
 622         case C_ISO_8859_4:
 623         case C_ISO_8859_5:
 624         case C_ISO_8859_7:
 625         case C_ISO_8859_8:
 626         case C_ISO_8859_9:
 627         case C_ISO_8859_11:
 628         case C_ISO_8859_13:
 629         case C_ISO_8859_15:
 630                 conv_unreadable_latin(outbuf);
 631                 break;
 632         case C_EUC_JP:
 633                 conv_unreadable_eucjp(outbuf);
 634                 break;
 635         default:
 636                 break;
 637         }
 638 }
 639
 640 CodeConverter *conv_code_converter_new(const gchar *charset)
 641 {
 642         CodeConverter *conv;
 643
 644         conv = g_new0(CodeConverter, 1);
 645         conv->code_conv_func = conv_get_code_conv_func(charset, NULL);
 646         conv->charset_str = g_strdup(charset);
 647         conv->charset = conv_get_charset_from_str(charset);
 648
 649         return conv;
 650 }
 651
 652 void conv_code_converter_destroy(CodeConverter *conv)
 653 {
 654         g_free(conv->charset_str);
 655         g_free(conv);
 656 }
 657
 658 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
 659                   const gchar *inbuf)
 660 {
 661 #if HAVE_ICONV
 662         if (conv->code_conv_func != conv_noconv)
 663                 conv->code_conv_func(outbuf, outlen, inbuf);
 664         else {
 665                 gchar *str;
 666
 667                 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
 668                 if (!str)
 669                         return -1;
 670                 else {
 671                         strncpy2(outbuf, str, outlen);
 672                         g_free(str);
 673                 }
 674         }
 675 #else /* !HAVE_ICONV */
 676         conv->code_conv_func(outbuf, outlen, inbuf);
 677 #endif
 678
 679         return 0;
 680 }
 681
 682 gchar *conv_codeset_strdup(const gchar *inbuf,
 683                            const gchar *src_code, const gchar *dest_code)
 684 {
 685         gchar *buf;
 686         size_t len;
 687         CodeConvFunc conv_func;
 688
 689         conv_func = conv_get_code_conv_func(src_code, dest_code);
 690         if (conv_func != conv_noconv) {
 691                 len = (strlen(inbuf) + 1) * 3;
 692                 buf = g_malloc(len);
 693                 if (!buf) return NULL;
 694
 695                 conv_func(buf, len, inbuf);
 696                 return g_realloc(buf, strlen(buf) + 1);
 697         }
 698
 699 #if HAVE_ICONV
 700         return conv_iconv_strdup(inbuf, src_code, dest_code);
 701 #else
 702         return g_strdup(inbuf);
 703 #endif /* HAVE_ICONV */
 704 }
 705
 706 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
 707                                      const gchar *dest_charset_str)
 708 {
 709         CodeConvFunc code_conv = conv_noconv;
 710         CharSet src_charset;
 711         CharSet dest_charset;
 712
 713         if (!src_charset_str)
 714                 src_charset = conv_get_current_charset();
 715         else
 716                 src_charset = conv_get_charset_from_str(src_charset_str);
 717
 718         /* auto detection mode */
 719         if (!src_charset_str && !dest_charset_str) {
 720                 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
 721                         return conv_anytodisp;
 722                 else
 723                         return conv_noconv;
 724         }
 725
 726         dest_charset = conv_get_charset_from_str(dest_charset_str);
 727
 728         if (dest_charset == C_US_ASCII)
 729                 return conv_ustodisp;
 730         else if (dest_charset == C_UTF_8 ||
 731                  (dest_charset == C_AUTO &&
 732                   conv_get_current_charset() == C_UTF_8))
 733                 return conv_noconv;
 734
 735         switch (src_charset) {
 736         case C_ISO_2022_JP:
 737         case C_ISO_2022_JP_2:
 738                 if (dest_charset == C_AUTO)
 739                         code_conv = conv_jistodisp;
 740                 else if (dest_charset == C_EUC_JP)
 741                         code_conv = conv_jistoeuc;
 742                 break;
 743         case C_US_ASCII:
 744                 if (dest_charset == C_AUTO)
 745                         code_conv = conv_ustodisp;
 746                 break;
 747         case C_ISO_8859_1:
 748         case C_ISO_8859_2:
 749         case C_ISO_8859_4:
 750         case C_ISO_8859_5:
 751         case C_ISO_8859_7:
 752         case C_ISO_8859_8:
 753         case C_ISO_8859_9:
 754         case C_ISO_8859_11:
 755         case C_ISO_8859_13:
 756         case C_ISO_8859_15:
 757                 if (dest_charset == C_AUTO)
 758                         code_conv = conv_latintodisp;
 759                 break;
 760         case C_SHIFT_JIS:
 761                 if (dest_charset == C_AUTO)
 762                         code_conv = conv_sjistodisp;
 763                 else if (dest_charset == C_EUC_JP)
 764                         code_conv = conv_sjistoeuc;
 765                 break;
 766         case C_EUC_JP:
 767                 if (dest_charset == C_AUTO)
 768                         code_conv = conv_euctodisp;
 769                 else if (dest_charset == C_ISO_2022_JP ||
 770                          dest_charset == C_ISO_2022_JP_2)
 771                         code_conv = conv_euctojis;
 772                 break;
 773         default:
 774                 break;
 775         }
 776
 777         return code_conv;
 778 }
 779
 780 #if HAVE_ICONV
 781 gchar *conv_iconv_strdup(const gchar *inbuf,
 782                          const gchar *src_code, const gchar *dest_code)
 783 {
 784         iconv_t cd;
 785         const gchar *inbuf_p;
 786         gchar *outbuf;
 787         gchar *outbuf_p;
 788         gint in_size;
 789         gint in_left;
 790         gint out_size;
 791         gint out_left;
 792         gint n_conv;
 793
 794         if (!src_code)
 795                 src_code = conv_get_outgoing_charset_str();
 796         if (!dest_code)
 797                 dest_code = conv_get_current_charset_str();
 798
 799         /* don't convert if current codeset is US-ASCII */
 800         if (!strcasecmp(dest_code, CS_US_ASCII))
 801                 return g_strdup(inbuf);
 802
 803         /* don't convert if src and dest codeset are identical */
 804         if (!strcasecmp(src_code, dest_code))
 805                 return g_strdup(inbuf);
 806
 807         cd = iconv_open(dest_code, src_code);
 808         if (cd == (iconv_t)-1)
 809                 return NULL;
 810
 811         inbuf_p = inbuf;
 812         in_size = strlen(inbuf) + 1;
 813         in_left = in_size;
 814         out_size = in_size * 2;
 815         outbuf = g_malloc(out_size);
 816         outbuf_p = outbuf;
 817         out_left = out_size;
 818
 819         while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
 820                                &outbuf_p, &out_left)) < 0) {
 821                 if (EILSEQ == errno) {
 822                         inbuf_p++;
 823                         in_left--;
 824                         *outbuf_p++ = SUBST_CHAR;
 825                         out_left--;
 826                 } else if (EINVAL == errno) {
 827                         *outbuf_p = '\0';
 828                         break;
 829                 } else if (E2BIG == errno) {
 830                         out_size *= 2;
 831                         outbuf = g_realloc(outbuf, out_size);
 832                         inbuf_p = inbuf;
 833                         in_left = in_size;
 834                         outbuf_p = outbuf;
 835                         out_left = out_size;
 836                 } else {
 837                         g_warning("conv_iconv_strdup(): %s\n",
 838                                   g_strerror(errno));
 839                         *outbuf_p = '\0';
 840                         break;
 841                 }
 842         }
 843
 844         iconv(cd, NULL, NULL, &outbuf_p, &out_left);
 845         outbuf = g_realloc(outbuf, strlen(outbuf) + 1);
 846
 847         iconv_close(cd);
 848
 849         return outbuf;
 850 }
 851 #endif /* HAVE_ICONV */
 852
 853 static const struct {
 854         CharSet charset;
 855         gchar *const name;
 856 } charsets[] = {
 857         {C_US_ASCII,            CS_US_ASCII},
 858         {C_US_ASCII,            CS_ANSI_X3_4_1968},
 859         {C_UTF_8,               CS_UTF_8},
 860         {C_ISO_8859_1,          CS_ISO_8859_1},
 861         {C_ISO_8859_2,          CS_ISO_8859_2},
 862         {C_ISO_8859_4,          CS_ISO_8859_4},
 863         {C_ISO_8859_5,          CS_ISO_8859_5},
 864         {C_ISO_8859_7,          CS_ISO_8859_7},
 865         {C_ISO_8859_8,          CS_ISO_8859_8},
 866         {C_ISO_8859_9,          CS_ISO_8859_9},
 867         {C_ISO_8859_11,         CS_ISO_8859_11},
 868         {C_ISO_8859_13,         CS_ISO_8859_13},
 869         {C_ISO_8859_15,         CS_ISO_8859_15},
 870         {C_BALTIC,              CS_BALTIC},
 871         {C_CP1251,              CS_CP1251},
 872         {C_WINDOWS_1251,        CS_WINDOWS_1251},
 873         {C_KOI8_R,              CS_KOI8_R},
 874         {C_KOI8_U,              CS_KOI8_U},
 875         {C_ISO_2022_JP,         CS_ISO_2022_JP},
 876         {C_ISO_2022_JP_2,       CS_ISO_2022_JP_2},
 877         {C_EUC_JP,              CS_EUC_JP},
 878         {C_EUC_JP,              CS_EUCJP},
 879         {C_SHIFT_JIS,           CS_SHIFT_JIS},
 880         {C_SHIFT_JIS,           CS_SHIFT__JIS},
 881         {C_SHIFT_JIS,           CS_SJIS},
 882         {C_ISO_2022_KR,         CS_ISO_2022_KR},
 883         {C_EUC_KR,              CS_EUC_KR},
 884         {C_ISO_2022_CN,         CS_ISO_2022_CN},
 885         {C_EUC_CN,              CS_EUC_CN},
 886         {C_GB2312,              CS_GB2312},
 887         {C_EUC_TW,              CS_EUC_TW},
 888         {C_BIG5,                CS_BIG5},
 889         {C_TIS_620,             CS_TIS_620},
 890         {C_WINDOWS_874,         CS_WINDOWS_874},
 891 };
 892
 893 static const struct {
 894         gchar *const locale;
 895         CharSet charset;
 896         CharSet out_charset;
 897 } locale_table[] = {
 898         {"ja_JP.eucJP"  , C_EUC_JP      , C_ISO_2022_JP},
 899         {"ja_JP.ujis"   , C_EUC_JP      , C_ISO_2022_JP},
 900         {"ja_JP.EUC"    , C_EUC_JP      , C_ISO_2022_JP},
 901         {"ja_JP.SJIS"   , C_SHIFT_JIS   , C_ISO_2022_JP},
 902         {"ja_JP.JIS"    , C_ISO_2022_JP , C_ISO_2022_JP},
 903         {"ja_JP"        , C_EUC_JP      , C_ISO_2022_JP},
 904         {"ko_KR"        , C_EUC_KR      , C_EUC_KR},
 905         {"zh_CN.GB2312" , C_GB2312      , C_GB2312},
 906         {"zh_CN"        , C_GB2312      , C_GB2312},
 907         {"zh_TW.eucTW"  , C_EUC_TW      , C_BIG5},
 908         {"zh_TW.Big5"   , C_BIG5        , C_BIG5},
 909         {"zh_TW"        , C_BIG5        , C_BIG5},
 910
 911         {"ru_RU.KOI8-R" , C_KOI8_R      , C_KOI8_R},
 912         {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
 913         {"ru_RU"        , C_ISO_8859_5  , C_KOI8_R},
 914         {"ru_UA"        , C_KOI8_U      , C_KOI8_U},
 915         {"uk_UA"        , C_KOI8_U      , C_KOI8_U},
 916         {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
 917         {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
 918
 919         {"en_US"        , C_ISO_8859_1  , C_ISO_8859_1},
 920         {"ca_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
 921         {"da_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
 922         {"de_DE"        , C_ISO_8859_1  , C_ISO_8859_1},
 923         {"nl_NL"        , C_ISO_8859_1  , C_ISO_8859_1},
 924         {"et_EE"        , C_ISO_8859_1  , C_ISO_8859_1},
 925         {"fi_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
 926         {"fr_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
 927         {"is_IS"        , C_ISO_8859_1  , C_ISO_8859_1},
 928         {"it_IT"        , C_ISO_8859_1  , C_ISO_8859_1},
 929         {"no_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
 930         {"pt_PT"        , C_ISO_8859_1  , C_ISO_8859_1},
 931         {"pt_BR"        , C_ISO_8859_1  , C_ISO_8859_1},
 932         {"es_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
 933         {"sv_SE"        , C_ISO_8859_1  , C_ISO_8859_1},
 934
 935         {"hr_HR"        , C_ISO_8859_2  , C_ISO_8859_2},
 936         {"hu_HU"        , C_ISO_8859_2  , C_ISO_8859_2},
 937         {"pl_PL"        , C_ISO_8859_2  , C_ISO_8859_2},
 938         {"ro_RO"        , C_ISO_8859_2  , C_ISO_8859_2},
 939         {"sk_SK"        , C_ISO_8859_2  , C_ISO_8859_2},
 940         {"sl_SI"        , C_ISO_8859_2  , C_ISO_8859_2},
 941         {"el_GR"        , C_ISO_8859_7  , C_ISO_8859_7},
 942         {"iw_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
 943         {"tr_TR"        , C_ISO_8859_9  , C_ISO_8859_9},
 944
 945         {"th_TH"        , C_TIS_620     , C_TIS_620},
 946         /* {"th_TH"     , C_WINDOWS_874}, */
 947         /* {"th_TH"     , C_ISO_8859_11}, */
 948
 949         {"lt_LT.iso88594"       , C_ISO_8859_4  , C_ISO_8859_4},
 950         {"lt_LT.ISO8859-4"      , C_ISO_8859_4  , C_ISO_8859_4},
 951         {"lt_LT.ISO_8859-4"     , C_ISO_8859_4  , C_ISO_8859_4},
 952         {"lt_LT"                , C_ISO_8859_13 , C_ISO_8859_13},
 953         {"lv_LV"                , C_ISO_8859_13 , C_ISO_8859_13},
 954
 955         {"C"                    , C_US_ASCII    , C_US_ASCII},
 956         {"POSIX"                , C_US_ASCII    , C_US_ASCII},
 957         {"ANSI_X3.4-1968"       , C_US_ASCII    , C_US_ASCII},
 958 };
 959
 960 static GHashTable *conv_get_charset_to_str_table(void)
 961 {
 962         static GHashTable *table;
 963         gint i;
 964
 965         if (table)
 966                 return table;
 967
 968         table = g_hash_table_new(NULL, g_direct_equal);
 969
 970         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
 971                 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
 972                     == NULL) {
 973                         g_hash_table_insert
 974                                 (table, GUINT_TO_POINTER(charsets[i].charset),
 975                                  charsets[i].name);
 976                 }
 977         }
 978
 979         return table;
 980 }
 981
 982 static gint str_case_equal(gconstpointer v, gconstpointer v2)
 983 {
 984         return strcasecmp((const gchar *)v, (const gchar *)v2) == 0;
 985 }
 986
 987 static guint str_case_hash(gconstpointer key)
 988 {
 989         const gchar *p = key;
 990         guint h = *p;
 991
 992         if (h) {
 993                 h = tolower(h);
 994                 for (p += 1; *p != '\0'; p++)
 995                         h = (h << 5) - h + tolower(*p);
 996         }
 997
 998         return h;
 999 }
1000
1001 static GHashTable *conv_get_charset_from_str_table(void)
1002 {
1003         static GHashTable *table;
1004         gint i;
1005
1006         if (table)
1007                 return table;
1008
1009         table = g_hash_table_new(str_case_hash, str_case_equal);
1010
1011         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1012                 g_hash_table_insert(table, charsets[i].name,
1013                                     GUINT_TO_POINTER(charsets[i].charset));
1014         }
1015
1016         return table;
1017 }
1018
1019 const gchar *conv_get_charset_str(CharSet charset)
1020 {
1021         GHashTable *table;
1022
1023         table = conv_get_charset_to_str_table();
1024         return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1025 }
1026
1027 CharSet conv_get_charset_from_str(const gchar *charset)
1028 {
1029         GHashTable *table;
1030
1031         if (!charset) return C_AUTO;
1032
1033         table = conv_get_charset_from_str_table();
1034         return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1035 }
1036
1037 CharSet conv_get_current_charset(void)
1038 {
1039         static CharSet cur_charset = -1;
1040         const gchar *cur_locale;
1041         const gchar *p;
1042         gint i;
1043
1044         if (cur_charset != -1)
1045                 return cur_charset;
1046
1047         cur_locale = conv_get_current_locale();
1048         if (!cur_locale) {
1049                 cur_charset = C_US_ASCII;
1050                 return cur_charset;
1051         }
1052
1053         if (strcasestr(cur_locale, "UTF-8")) {
1054                 cur_charset = C_UTF_8;
1055                 return cur_charset;
1056         }
1057
1058         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1059                 cur_charset = C_ISO_8859_15;
1060                 return cur_charset;
1061         }
1062
1063         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1064                 const gchar *p;
1065
1066                 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1067                    "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1068                 if (!strncasecmp(cur_locale, locale_table[i].locale,
1069                                  strlen(locale_table[i].locale))) {
1070                         cur_charset = locale_table[i].charset;
1071                         return cur_charset;
1072                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1073                          !strchr(p + 1, '.')) {
1074                         if (strlen(cur_locale) == 2 &&
1075                             !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1076                                 cur_charset = locale_table[i].charset;
1077                                 return cur_charset;
1078                         }
1079                 }
1080         }
1081
1082         cur_charset = C_AUTO;
1083         return cur_charset;
1084 }
1085
1086 const gchar *conv_get_current_charset_str(void)
1087 {
1088         static const gchar *codeset = NULL;
1089
1090         if (!codeset)
1091                 codeset = conv_get_charset_str(conv_get_current_charset());
1092
1093         return codeset ? codeset : CS_US_ASCII;
1094 }
1095
1096 CharSet conv_get_outgoing_charset(void)
1097 {
1098         static CharSet out_charset = -1;
1099         const gchar *cur_locale;
1100         const gchar *p;
1101         gint i;
1102
1103         if (out_charset != -1)
1104                 return out_charset;
1105
1106         cur_locale = conv_get_current_locale();
1107         if (!cur_locale) {
1108                 out_charset = C_AUTO;
1109                 return out_charset;
1110         }
1111
1112         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1113                 out_charset = C_ISO_8859_15;
1114                 return out_charset;
1115         }
1116
1117         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1118                 const gchar *p;
1119
1120                 if (!strncasecmp(cur_locale, locale_table[i].locale,
1121                                  strlen(locale_table[i].locale))) {
1122                         out_charset = locale_table[i].out_charset;
1123                         break;
1124                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1125                          !strchr(p + 1, '.')) {
1126                         if (strlen(cur_locale) == 2 &&
1127                             !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1128                                 out_charset = locale_table[i].out_charset;
1129                                 break;
1130                         }
1131                 }
1132         }
1133
1134 #if !HAVE_ICONV
1135         /* encoding conversion without iconv() is only supported
1136            on Japanese locale for now */
1137         if (out_charset == C_ISO_2022_JP)
1138                 return out_charset;
1139         else
1140                 return conv_get_current_charset();
1141 #endif
1142
1143         return out_charset;
1144 }
1145
1146 const gchar *conv_get_outgoing_charset_str(void)
1147 {
1148         CharSet out_charset;
1149         const gchar *str;
1150
1151         if (prefs_common.outgoing_charset) {
1152                 if (!isalpha(prefs_common.outgoing_charset[0])) {
1153                         g_free(prefs_common.outgoing_charset);
1154                         prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1155                 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1156                         return prefs_common.outgoing_charset;
1157         }
1158
1159         out_charset = conv_get_outgoing_charset();
1160         str = conv_get_charset_str(out_charset);
1161
1162         return str ? str : CS_US_ASCII;
1163 }
1164
1165 const gchar *conv_get_current_locale(void)
1166 {
1167         gchar *cur_locale;
1168
1169         cur_locale = g_getenv("LC_ALL");
1170         if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1171         if (!cur_locale) cur_locale = g_getenv("LANG");
1172         if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1173
1174         debug_print("current locale: %s\n",
1175                     cur_locale ? cur_locale : "(none)");
1176
1177         return cur_locale;
1178 }
1179
1180 void conv_unmime_header_overwrite(gchar *str)
1181 {
1182         gchar *buf;
1183         gint buflen;
1184         CharSet cur_charset;
1185
1186         cur_charset = conv_get_current_charset();
1187
1188         if (cur_charset == C_EUC_JP) {
1189                 buflen = strlen(str) * 2 + 1;
1190                 Xalloca(buf, buflen, return);
1191                 conv_anytodisp(buf, buflen, str);
1192                 unmime_header(str, buf);
1193         } else {
1194                 buflen = strlen(str) + 1;
1195                 Xalloca(buf, buflen, return);
1196                 unmime_header(buf, str);
1197                 strncpy2(str, buf, buflen);
1198         }
1199 }
1200
1201 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1202                         const gchar *charset)
1203 {
1204         CharSet cur_charset;
1205
1206         cur_charset = conv_get_current_charset();
1207
1208         if (cur_charset == C_EUC_JP) {
1209                 gchar *buf;
1210                 gint buflen;
1211
1212                 buflen = strlen(str) * 2 + 1;
1213                 Xalloca(buf, buflen, return);
1214                 conv_anytodisp(buf, buflen, str);
1215                 unmime_header(outbuf, buf);
1216         } else
1217                 unmime_header(outbuf, str);
1218 }
1219
1220 #define MAX_LINELEN             76
1221 #define MAX_HARD_LINELEN        996
1222 #define MIMESEP_BEGIN           "=?"
1223 #define MIMESEP_END             "?="
1224
1225 #define B64LEN(len)     ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1226
1227 #define LBREAK_IF_REQUIRED(cond, is_plain_text)                         \
1228 {                                                                       \
1229         if (len - (destp - dest) < MAX_LINELEN + 2) {                   \
1230                 *destp = '\0';                                          \
1231                 return;                                                 \
1232         }                                                               \
1233                                                                         \
1234         if ((cond) && *srcp) {                                          \
1235                 if (destp > dest && left < MAX_LINELEN - 1) {           \
1236                         if (isspace(*(destp - 1)))                      \
1237                                 destp--;                                \
1238                         else if (is_plain_text && isspace(*srcp))       \
1239                                 srcp++;                                 \
1240                         if (*srcp) {                                    \
1241                                 *destp++ = '\n';                        \
1242                                 *destp++ = ' ';                         \
1243                                 left = MAX_LINELEN - 1;                 \
1244                         }                                               \
1245                 }                                                       \
1246         }                                                               \
1247 }
1248
1249 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1250                         gint header_len)
1251 {
1252         const gchar *cur_encoding;
1253         const gchar *out_encoding;
1254         gint mimestr_len;
1255         gchar *mimesep_enc;
1256         gint left;
1257         const gchar *srcp = src;
1258         gchar *destp = dest;
1259         gboolean use_base64;
1260
1261         if (MB_CUR_MAX > 1) {
1262                 use_base64 = TRUE;
1263                 mimesep_enc = "?B?";
1264         } else {
1265                 use_base64 = FALSE;
1266                 mimesep_enc = "?Q?";
1267         }
1268
1269         cur_encoding = conv_get_current_charset_str();
1270         if (!strcmp(cur_encoding, CS_US_ASCII))
1271                 cur_encoding = CS_ISO_8859_1;
1272         out_encoding = conv_get_outgoing_charset_str();
1273         if (!strcmp(out_encoding, CS_US_ASCII))
1274                 out_encoding = CS_ISO_8859_1;
1275
1276         mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1277                 strlen(mimesep_enc) + strlen(MIMESEP_END);
1278
1279         left = MAX_LINELEN - header_len;
1280
1281         while (*srcp) {
1282                 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1283
1284                 while (isspace(*srcp)) {
1285                         *destp++ = *srcp++;
1286                         left--;
1287                         LBREAK_IF_REQUIRED(left <= 0, TRUE);
1288                 }
1289
1290                 /* output as it is if the next word is ASCII string */
1291                 if (!is_next_nonascii(srcp)) {
1292                         gint word_len;
1293
1294                         word_len = get_next_word_len(srcp);
1295                         LBREAK_IF_REQUIRED(left < word_len, TRUE);
1296                         while (word_len > 0) {
1297                                 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1298                                 *destp++ = *srcp++;
1299                                 left--;
1300                                 word_len--;
1301                         }
1302
1303                         continue;
1304                 }
1305
1306                 while (1) {
1307                         gint mb_len = 0;
1308                         gint cur_len = 0;
1309                         gchar *part_str;
1310                         gchar *out_str;
1311                         gchar *enc_str;
1312                         const gchar *p = srcp;
1313                         gint out_str_len;
1314                         gint out_enc_str_len;
1315                         gint mime_block_len;
1316                         gboolean cont = FALSE;
1317
1318                         while (*p != '\0') {
1319                                 if (isspace(*p) && !is_next_nonascii(p + 1))
1320                                         break;
1321
1322                                 if (MB_CUR_MAX > 1) {
1323                                         mb_len = mblen(p, MB_CUR_MAX);
1324                                         if (mb_len < 0) {
1325                                                 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1326                                                 mb_len = 1;
1327                                         }
1328                                 } else
1329                                         mb_len = 1;
1330
1331                                 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1332                                 out_str = conv_codeset_strdup
1333                                         (part_str, cur_encoding, out_encoding);
1334                                 if (!out_str) {
1335                                         g_warning("conv_encode_header(): code conversion failed\n");
1336                                         conv_unreadable_8bit(part_str);
1337                                         out_str = g_strdup(part_str);
1338                                 }
1339                                 out_str_len = strlen(out_str);
1340
1341                                 if (use_base64)
1342                                         out_enc_str_len = B64LEN(out_str_len);
1343                                 else
1344                                         out_enc_str_len =
1345                                                 qp_get_q_encoding_len(out_str);
1346
1347                                 g_free(out_str);
1348
1349                                 if (mimestr_len + out_enc_str_len <= left) {
1350                                         cur_len += mb_len;
1351                                         p += mb_len;
1352                                 } else if (cur_len == 0) {
1353                                         LBREAK_IF_REQUIRED(1, FALSE);
1354                                         continue;
1355                                 } else {
1356                                         cont = TRUE;
1357                                         break;
1358                                 }
1359                         }
1360
1361                         if (cur_len > 0) {
1362                                 Xstrndup_a(part_str, srcp, cur_len, );
1363                                 out_str = conv_codeset_strdup
1364                                         (part_str, cur_encoding, out_encoding);
1365                                 if (!out_str) {
1366                                         g_warning("conv_encode_header(): code conversion failed\n");
1367                                         conv_unreadable_8bit(part_str);
1368                                         out_str = g_strdup(part_str);
1369                                 }
1370                                 out_str_len = strlen(out_str);
1371
1372                                 if (use_base64)
1373                                         out_enc_str_len = B64LEN(out_str_len);
1374                                 else
1375                                         out_enc_str_len =
1376                                                 qp_get_q_encoding_len(out_str);
1377
1378                                 Xalloca(enc_str, out_enc_str_len + 1, );
1379                                 if (use_base64)
1380                                         base64_encode(enc_str, out_str, out_str_len);
1381                                 else
1382                                         qp_q_encode(enc_str, out_str);
1383
1384                                 g_free(out_str);
1385
1386                                 /* output MIME-encoded string block */
1387                                 mime_block_len = mimestr_len + strlen(enc_str);
1388                                 g_snprintf(destp, mime_block_len + 1,
1389                                            MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1390                                            out_encoding, mimesep_enc, enc_str);
1391                                 destp += mime_block_len;
1392                                 srcp += cur_len;
1393
1394                                 left -= mime_block_len;
1395                         }
1396
1397                         LBREAK_IF_REQUIRED(cont, FALSE);
1398
1399                         if (cur_len == 0)
1400                                 break;
1401                 }
1402         }
1403
1404         *destp = '\0';
1405 }
1406
1407 #undef LBREAK_IF_REQUIRED