src/codeconv.c

   1 /*
   2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
   3  * Copyright (C) 1999-2003 Hiroyuki Yamamoto
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 #  include "config.h"
  22 #endif
  23
  24 #include <glib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include <stdlib.h>
  28 #include <errno.h>
  29
  30 #if HAVE_LOCALE_H
  31 #  include <locale.h>
  32 #endif
  33
  34 #if HAVE_ICONV
  35 #  include <iconv.h>
  36 #endif
  37
  38 #include "intl.h"
  39 #include "codeconv.h"
  40 #include "unmime.h"
  41 #include "base64.h"
  42 #include "quoted-printable.h"
  43 #include "utils.h"
  44 #include "prefs_common.h"
  45
  46 typedef enum
  47 {
  48         JIS_ASCII,
  49         JIS_KANJI,
  50         JIS_HWKANA,
  51         JIS_AUXKANJI
  52 } JISState;
  53
  54 #define SUBST_CHAR      '_'
  55 #define ESC             '\033'
  56
  57 #define iseuckanji(c) \
  58         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
  59 #define iseuchwkana1(c) \
  60         (((c) & 0xff) == 0x8e)
  61 #define iseuchwkana2(c) \
  62         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  63 #define iseucaux(c) \
  64         (((c) & 0xff) == 0x8f)
  65 #define issjiskanji1(c) \
  66         ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
  67          (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
  68 #define issjiskanji2(c) \
  69         ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
  70          (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
  71 #define issjishwkana(c) \
  72         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  73
  74 #define K_IN()                          \
  75         if (state != JIS_KANJI) {       \
  76                 *out++ = ESC;           \
  77                 *out++ = '$';           \
  78                 *out++ = 'B';           \
  79                 state = JIS_KANJI;      \
  80         }
  81
  82 #define K_OUT()                         \
  83         if (state != JIS_ASCII) {       \
  84                 *out++ = ESC;           \
  85                 *out++ = '(';           \
  86                 *out++ = 'B';           \
  87                 state = JIS_ASCII;      \
  88         }
  89
  90 #define HW_IN()                         \
  91         if (state != JIS_HWKANA) {      \
  92                 *out++ = ESC;           \
  93                 *out++ = '(';           \
  94                 *out++ = 'I';           \
  95                 state = JIS_HWKANA;     \
  96         }
  97
  98 #define AUX_IN()                        \
  99         if (state != JIS_AUXKANJI) {    \
 100                 *out++ = ESC;           \
 101                 *out++ = '$';           \
 102                 *out++ = '(';           \
 103                 *out++ = 'D';           \
 104                 state = JIS_AUXKANJI;   \
 105         }
 106
 107 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 108 {
 109         const guchar *in = inbuf;
 110         guchar *out = outbuf;
 111         JISState state = JIS_ASCII;
 112
 113         while (*in != '\0') {
 114                 if (*in == ESC) {
 115                         in++;
 116                         if (*in == '$') {
 117                                 if (*(in + 1) == '@' || *(in + 1) == 'B') {
 118                                         state = JIS_KANJI;
 119                                         in += 2;
 120                                 } else if (*(in + 1) == '(' &&
 121                                            *(in + 2) == 'D') {
 122                                         state = JIS_AUXKANJI;
 123                                         in += 3;
 124                                 } else {
 125                                         /* unknown escape sequence */
 126                                         state = JIS_ASCII;
 127                                 }
 128                         } else if (*in == '(') {
 129                                 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
 130                                         state = JIS_ASCII;
 131                                         in += 2;
 132                                 } else if (*(in + 1) == 'I') {
 133                                         state = JIS_HWKANA;
 134                                         in += 2;
 135                                 } else {
 136                                         /* unknown escape sequence */
 137                                         state = JIS_ASCII;
 138                                 }
 139                         } else {
 140                                 /* unknown escape sequence */
 141                                 state = JIS_ASCII;
 142                         }
 143                 } else if (*in == 0x0e) {
 144                         state = JIS_HWKANA;
 145                         in++;
 146                 } else if (*in == 0x0f) {
 147                         state = JIS_ASCII;
 148                         in++;
 149                 } else {
 150                         switch (state) {
 151                         case JIS_ASCII:
 152                                 *out++ = *in++;
 153                                 break;
 154                         case JIS_KANJI:
 155                                 *out++ = *in++ | 0x80;
 156                                 if (*in == '\0') break;
 157                                 *out++ = *in++ | 0x80;
 158                                 break;
 159                         case JIS_HWKANA:
 160                                 *out++ = 0x8e;
 161                                 *out++ = *in++ | 0x80;
 162                                 break;
 163                         case JIS_AUXKANJI:
 164                                 *out++ = 0x8f;
 165                                 *out++ = *in++ | 0x80;
 166                                 if (*in == '\0') break;
 167                                 *out++ = *in++ | 0x80;
 168                                 break;
 169                         }
 170                 }
 171         }
 172
 173         *out = '\0';
 174 }
 175
 176 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 177 {
 178         const guchar *in = inbuf;
 179         guchar *out = outbuf;
 180         JISState state = JIS_ASCII;
 181
 182         while (*in != '\0') {
 183                 if (isascii(*in)) {
 184                         K_OUT();
 185                         *out++ = *in++;
 186                 } else if (iseuckanji(*in)) {
 187                         if (iseuckanji(*(in + 1))) {
 188                                 K_IN();
 189                                 *out++ = *in++ & 0x7f;
 190                                 *out++ = *in++ & 0x7f;
 191                         } else {
 192                                 K_OUT();
 193                                 *out++ = SUBST_CHAR;
 194                                 in++;
 195                                 if (*in != '\0' && !isascii(*in)) {
 196                                         *out++ = SUBST_CHAR;
 197                                         in++;
 198                                 }
 199                         }
 200                 } else if (iseuchwkana1(*in)) {
 201                         in++;
 202                         if (iseuchwkana2(*in)) {
 203                                 HW_IN();
 204                                 *out++ = *in++ & 0x7f;
 205                         } else {
 206                                 K_OUT();
 207                                 if (*in != '\0' && !isascii(*in)) {
 208                                         *out++ = SUBST_CHAR;
 209                                         in++;
 210                                 }
 211                         }
 212                 } else if (iseucaux(*in)) {
 213                         in++;
 214                         if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
 215                                 AUX_IN();
 216                                 *out++ = *in++ & 0x7f;
 217                                 *out++ = *in++ & 0x7f;
 218                         } else {
 219                                 K_OUT();
 220                                 if (*in != '\0' && !isascii(*in)) {
 221                                         *out++ = SUBST_CHAR;
 222                                         in++;
 223                                         if (*in != '\0' && !isascii(*in)) {
 224                                                 *out++ = SUBST_CHAR;
 225                                                 in++;
 226                                         }
 227                                 }
 228                         }
 229                 } else {
 230                         K_OUT();
 231                         *out++ = SUBST_CHAR;
 232                         in++;
 233                 }
 234         }
 235
 236         K_OUT();
 237         *out = '\0';
 238 }
 239
 240 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 241 {
 242         const guchar *in = inbuf;
 243         guchar *out = outbuf;
 244
 245         while (*in != '\0') {
 246                 if (isascii(*in)) {
 247                         *out++ = *in++;
 248                 } else if (issjiskanji1(*in)) {
 249                         if (issjiskanji2(*(in + 1))) {
 250                                 guchar out1 = *in;
 251                                 guchar out2 = *(in + 1);
 252                                 guchar row;
 253
 254                                 row = out1 < 0xa0 ? 0x70 : 0xb0;
 255                                 if (out2 < 0x9f) {
 256                                         out1 = (out1 - row) * 2 - 1;
 257                                         out2 -= out2 > 0x7f ? 0x20 : 0x1f;
 258                                 } else {
 259                                         out1 = (out1 - row) * 2;
 260                                         out2 -= 0x7e;
 261                                 }
 262
 263                                 *out++ = out1 | 0x80;
 264                                 *out++ = out2 | 0x80;
 265                                 in += 2;
 266                         } else {
 267                                 *out++ = SUBST_CHAR;
 268                                 in++;
 269                                 if (*in != '\0' && !isascii(*in)) {
 270                                         *out++ = SUBST_CHAR;
 271                                         in++;
 272                                 }
 273                         }
 274                 } else if (issjishwkana(*in)) {
 275                         *out++ = 0x8e;
 276                         *out++ = *in++;
 277                 } else {
 278                         *out++ = SUBST_CHAR;
 279                         in++;
 280                 }
 281         }
 282
 283         *out = '\0';
 284 }
 285
 286 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 287 {
 288         switch (conv_guess_ja_encoding(inbuf)) {
 289         case C_ISO_2022_JP:
 290                 conv_jistoeuc(outbuf, outlen, inbuf);
 291                 break;
 292         case C_SHIFT_JIS:
 293                 conv_sjistoeuc(outbuf, outlen, inbuf);
 294                 break;
 295         default:
 296                 strncpy2(outbuf, inbuf, outlen);
 297                 break;
 298         }
 299 }
 300
 301 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 302 {
 303         switch (conv_guess_ja_encoding(inbuf)) {
 304         case C_EUC_JP:
 305                 conv_euctojis(outbuf, outlen, inbuf);
 306                 break;
 307         default:
 308                 strncpy2(outbuf, inbuf, outlen);
 309                 break;
 310         }
 311 }
 312
 313 static gchar valid_eucjp_tbl[][96] = {
 314         /* 0xa2a0 - 0xa2ff */
 315         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
 316           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 317           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 318           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
 319           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 320           0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
 321
 322         /* 0xa3a0 - 0xa3ff */
 323         { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 324           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
 325           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 326           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 327           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 328           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
 329
 330         /* 0xa4a0 - 0xa4ff */
 331         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 332           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 333           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 334           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 335           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 336           1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 337
 338         /* 0xa5a0 - 0xa5ff */
 339         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 340           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 341           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 342           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 343           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 344           1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 345
 346         /* 0xa6a0 - 0xa6ff */
 347         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 348           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 349           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 350           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 351           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 352           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 353
 354         /* 0xa7a0 - 0xa7ff */
 355         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 356           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 357           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 358           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 359           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 360           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 361
 362         /* 0xa8a0 - 0xa8ff */
 363         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 364           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 365           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 366           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 367           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 368           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
 369 };
 370
 371 static gboolean isprintableeuckanji(guchar c1, guchar c2)
 372 {
 373         if (c1 <= 0xa0 || c1 >= 0xf5)
 374                 return FALSE;
 375         if (c2 <= 0xa0 || c2 == 0xff)
 376                 return FALSE;
 377
 378         if (c1 >= 0xa9 && c1 <= 0xaf)
 379                 return FALSE;
 380
 381         if (c1 >= 0xa2 && c1 <= 0xa8)
 382                 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
 383
 384         if (c1 == 0xcf) {
 385                 if (c2 >= 0xd4 && c2 <= 0xff)
 386                         return FALSE;
 387         } else if (c1 == 0xf4) {
 388                 if (c2 >= 0xa7 && c2 <= 0xff)
 389                         return FALSE;
 390         }
 391
 392         return TRUE;
 393 }
 394
 395 void conv_unreadable_eucjp(gchar *str)
 396 {
 397         register guchar *p = str;
 398
 399         while (*p != '\0') {
 400                 if (isascii(*p)) {
 401                         /* convert CR+LF -> LF */
 402                         if (*p == '\r' && *(p + 1) == '\n')
 403                                 memmove(p, p + 1, strlen(p));
 404                         /* printable 7 bit code */
 405                         p++;
 406                 } else if (iseuckanji(*p)) {
 407                         if (isprintableeuckanji(*p, *(p + 1))) {
 408                                 /* printable euc-jp code */
 409                                 p += 2;
 410                         } else {
 411                                 /* substitute unprintable code */
 412                                 *p++ = SUBST_CHAR;
 413                                 if (*p != '\0') {
 414                                         if (isascii(*p))
 415                                                 p++;
 416                                         else
 417                                                 *p++ = SUBST_CHAR;
 418                                 }
 419                         }
 420                 } else if (iseuchwkana1(*p)) {
 421                         if (iseuchwkana2(*(p + 1)))
 422                                 /* euc-jp hankaku kana */
 423                                 p += 2;
 424                         else
 425                                 *p++ = SUBST_CHAR;
 426                 } else if (iseucaux(*p)) {
 427                         if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
 428                                 /* auxiliary kanji */
 429                                 p += 3;
 430                         } else
 431                                 *p++ = SUBST_CHAR;
 432                 } else
 433                         /* substitute unprintable 1 byte code */
 434                         *p++ = SUBST_CHAR;
 435         }
 436 }
 437
 438 void conv_unreadable_8bit(gchar *str)
 439 {
 440         register guchar *p = str;
 441
 442         while (*p != '\0') {
 443                 /* convert CR+LF -> LF */
 444                 if (*p == '\r' && *(p + 1) == '\n')
 445                         memmove(p, p + 1, strlen(p));
 446                 else if (!isascii(*p)) *p = SUBST_CHAR;
 447                 p++;
 448         }
 449 }
 450
 451 void conv_unreadable_latin(gchar *str)
 452 {
 453         register guchar *p = str;
 454
 455         while (*p != '\0') {
 456                 /* convert CR+LF -> LF */
 457                 if (*p == '\r' && *(p + 1) == '\n')
 458                         memmove(p, p + 1, strlen(p));
 459                 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
 460                         *p = SUBST_CHAR;
 461                 p++;
 462         }
 463 }
 464
 465 void conv_unreadable_locale(gchar *str)
 466 {
 467         switch (conv_get_current_charset()) {
 468         case C_US_ASCII:
 469         case C_ISO_8859_1:
 470         case C_ISO_8859_2:
 471         case C_ISO_8859_3:
 472         case C_ISO_8859_4:
 473         case C_ISO_8859_5:
 474         case C_ISO_8859_6:
 475         case C_ISO_8859_7:
 476         case C_ISO_8859_8:
 477         case C_ISO_8859_9:
 478         case C_ISO_8859_10:
 479         case C_ISO_8859_11:
 480         case C_ISO_8859_13:
 481         case C_ISO_8859_14:
 482         case C_ISO_8859_15:
 483                 conv_unreadable_latin(str);
 484                 break;
 485         case C_EUC_JP:
 486                 conv_unreadable_eucjp(str);
 487                 break;
 488         default:
 489                 break;
 490         }
 491 }
 492
 493 #define NCV     '\0'
 494
 495 void conv_mb_alnum(gchar *str)
 496 {
 497         static guchar char_tbl[] = {
 498                 /* 0xa0 - 0xaf */
 499                 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
 500                 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
 501                 /* 0xb0 - 0xbf */
 502                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 503                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 504                 /* 0xc0 - 0xcf */
 505                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 506                 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
 507                 /* 0xd0 - 0xdf */
 508                 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
 509                 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
 510                 /* 0xe0 - 0xef */
 511                 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
 512                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
 513         };
 514
 515         register guchar *p = str;
 516         register gint len;
 517
 518         len = strlen(str);
 519
 520         while (len > 1) {
 521                 if (*p == 0xa3) {
 522                         register guchar ch = *(p + 1);
 523
 524                         if (ch >= 0xb0 && ch <= 0xfa) {
 525                                 /* [a-zA-Z] */
 526                                 *p = ch & 0x7f;
 527                                 p++;
 528                                 len--;
 529                                 memmove(p, p + 1, len);
 530                                 len--;
 531                         } else  {
 532                                 p += 2;
 533                                 len -= 2;
 534                         }
 535                 } else if (*p == 0xa1) {
 536                         register guchar ch = *(p + 1);
 537
 538                         if (ch >= 0xa0 && ch <= 0xef &&
 539                             NCV != char_tbl[ch - 0xa0]) {
 540                                 *p = char_tbl[ch - 0xa0];
 541                                 p++;
 542                                 len--;
 543                                 memmove(p, p + 1, len);
 544                                 len--;
 545                         } else {
 546                                 p += 2;
 547                                 len -= 2;
 548                         }
 549                 } else if (iseuckanji(*p)) {
 550                         p += 2;
 551                         len -= 2;
 552                 } else {
 553                         p++;
 554                         len--;
 555                 }
 556         }
 557 }
 558
 559 CharSet conv_guess_ja_encoding(const gchar *str)
 560 {
 561         const guchar *p = str;
 562         CharSet guessed = C_US_ASCII;
 563
 564         while (*p != '\0') {
 565                 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
 566                         if (guessed == C_US_ASCII)
 567                                 return C_ISO_2022_JP;
 568                         p += 2;
 569                 } else if (isascii(*p)) {
 570                         p++;
 571                 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
 572                         if (*p >= 0xfd && *p <= 0xfe)
 573                                 return C_EUC_JP;
 574                         else if (guessed == C_SHIFT_JIS) {
 575                                 if ((issjiskanji1(*p) &&
 576                                      issjiskanji2(*(p + 1))) ||
 577                                     issjishwkana(*p))
 578                                         guessed = C_SHIFT_JIS;
 579                                 else
 580                                         guessed = C_EUC_JP;
 581                         } else
 582                                 guessed = C_EUC_JP;
 583                         p += 2;
 584                 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
 585                         if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
 586                                 guessed = C_SHIFT_JIS;
 587                         else
 588                                 return C_SHIFT_JIS;
 589                         p += 2;
 590                 } else if (issjishwkana(*p)) {
 591                         guessed = C_SHIFT_JIS;
 592                         p++;
 593                 } else {
 594                         p++;
 595                 }
 596         }
 597
 598         return guessed;
 599 }
 600
 601 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 602 {
 603         conv_jistoeuc(outbuf, outlen, inbuf);
 604         conv_unreadable_eucjp(outbuf);
 605 }
 606
 607 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 608 {
 609         conv_sjistoeuc(outbuf, outlen, inbuf);
 610         conv_unreadable_eucjp(outbuf);
 611 }
 612
 613 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 614 {
 615         strncpy2(outbuf, inbuf, outlen);
 616         conv_unreadable_eucjp(outbuf);
 617 }
 618
 619 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 620 {
 621         conv_anytoeuc(outbuf, outlen, inbuf);
 622         conv_unreadable_eucjp(outbuf);
 623 }
 624
 625 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 626 {
 627         strncpy2(outbuf, inbuf, outlen);
 628         conv_unreadable_8bit(outbuf);
 629 }
 630
 631 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 632 {
 633         strncpy2(outbuf, inbuf, outlen);
 634         conv_unreadable_latin(outbuf);
 635 }
 636
 637 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 638 {
 639         strncpy2(outbuf, inbuf, outlen);
 640         conv_unreadable_locale(outbuf);
 641 }
 642
 643 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
 644 {
 645         strncpy2(outbuf, inbuf, outlen);
 646 }
 647
 648 CodeConverter *conv_code_converter_new(const gchar *charset)
 649 {
 650         CodeConverter *conv;
 651
 652         conv = g_new0(CodeConverter, 1);
 653         conv->code_conv_func = conv_get_code_conv_func(charset, NULL);
 654         conv->charset_str = g_strdup(charset);
 655         conv->charset = conv_get_charset_from_str(charset);
 656
 657         return conv;
 658 }
 659
 660 void conv_code_converter_destroy(CodeConverter *conv)
 661 {
 662         g_free(conv->charset_str);
 663         g_free(conv);
 664 }
 665
 666 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
 667                   const gchar *inbuf)
 668 {
 669 #if HAVE_ICONV
 670         if (conv->code_conv_func != conv_noconv)
 671                 conv->code_conv_func(outbuf, outlen, inbuf);
 672         else {
 673                 gchar *str;
 674
 675                 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
 676                 if (!str)
 677                         return -1;
 678                 else {
 679                         strncpy2(outbuf, str, outlen);
 680                         g_free(str);
 681                 }
 682         }
 683 #else /* !HAVE_ICONV */
 684         conv->code_conv_func(outbuf, outlen, inbuf);
 685 #endif
 686
 687         return 0;
 688 }
 689
 690 gchar *conv_codeset_strdup(const gchar *inbuf,
 691                            const gchar *src_code, const gchar *dest_code)
 692 {
 693         gchar *buf;
 694         size_t len;
 695         CodeConvFunc conv_func;
 696
 697         conv_func = conv_get_code_conv_func(src_code, dest_code);
 698         if (conv_func != conv_noconv) {
 699                 len = (strlen(inbuf) + 1) * 3;
 700                 buf = g_malloc(len);
 701                 if (!buf) return NULL;
 702
 703                 conv_func(buf, len, inbuf);
 704                 return g_realloc(buf, strlen(buf) + 1);
 705         }
 706
 707 #if HAVE_ICONV
 708         return conv_iconv_strdup(inbuf, src_code, dest_code);
 709 #else
 710         return g_strdup(inbuf);
 711 #endif /* HAVE_ICONV */
 712 }
 713
 714 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
 715                                      const gchar *dest_charset_str)
 716 {
 717         CodeConvFunc code_conv = conv_noconv;
 718         CharSet src_charset;
 719         CharSet dest_charset;
 720
 721         if (!src_charset_str)
 722                 src_charset = conv_get_current_charset();
 723         else
 724                 src_charset = conv_get_charset_from_str(src_charset_str);
 725
 726         /* auto detection mode */
 727         if (!src_charset_str && !dest_charset_str) {
 728                 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
 729                         return conv_anytodisp;
 730                 else
 731                         return conv_noconv;
 732         }
 733
 734         dest_charset = conv_get_charset_from_str(dest_charset_str);
 735
 736         if (dest_charset == C_US_ASCII)
 737                 return conv_ustodisp;
 738         else if (dest_charset == C_UTF_8 ||
 739                  (dest_charset == C_AUTO &&
 740                   conv_get_current_charset() == C_UTF_8))
 741                 return conv_noconv;
 742
 743         switch (src_charset) {
 744         case C_ISO_2022_JP:
 745         case C_ISO_2022_JP_2:
 746                 if (dest_charset == C_AUTO)
 747                         code_conv = conv_jistodisp;
 748                 else if (dest_charset == C_EUC_JP)
 749                         code_conv = conv_jistoeuc;
 750                 break;
 751         case C_US_ASCII:
 752                 if (dest_charset == C_AUTO)
 753                         code_conv = conv_ustodisp;
 754                 break;
 755         case C_ISO_8859_1:
 756         case C_ISO_8859_2:
 757         case C_ISO_8859_3:
 758         case C_ISO_8859_4:
 759         case C_ISO_8859_5:
 760         case C_ISO_8859_6:
 761         case C_ISO_8859_7:
 762         case C_ISO_8859_8:
 763         case C_ISO_8859_9:
 764         case C_ISO_8859_10:
 765         case C_ISO_8859_11:
 766         case C_ISO_8859_13:
 767         case C_ISO_8859_14:
 768         case C_ISO_8859_15:
 769                 if (dest_charset == C_AUTO)
 770                         code_conv = conv_latintodisp;
 771                 break;
 772         case C_SHIFT_JIS:
 773                 if (dest_charset == C_AUTO)
 774                         code_conv = conv_sjistodisp;
 775                 else if (dest_charset == C_EUC_JP)
 776                         code_conv = conv_sjistoeuc;
 777                 break;
 778         case C_EUC_JP:
 779                 if (dest_charset == C_AUTO)
 780                         code_conv = conv_euctodisp;
 781                 else if (dest_charset == C_ISO_2022_JP ||
 782                          dest_charset == C_ISO_2022_JP_2)
 783                         code_conv = conv_euctojis;
 784                 break;
 785         default:
 786                 break;
 787         }
 788
 789         return code_conv;
 790 }
 791
 792 #if HAVE_ICONV
 793 gchar *conv_iconv_strdup(const gchar *inbuf,
 794                          const gchar *src_code, const gchar *dest_code)
 795 {
 796         iconv_t cd;
 797         const gchar *inbuf_p;
 798         gchar *outbuf;
 799         gchar *outbuf_p;
 800         gint in_size;
 801         gint in_left;
 802         gint out_size;
 803         gint out_left;
 804         gint n_conv;
 805         gint len;
 806
 807         if (!src_code)
 808                 src_code = conv_get_outgoing_charset_str();
 809         if (!dest_code)
 810                 dest_code = conv_get_current_charset_str();
 811
 812         /* don't convert if current codeset is US-ASCII */
 813         if (!strcasecmp(dest_code, CS_US_ASCII))
 814                 return g_strdup(inbuf);
 815
 816         /* don't convert if src and dest codeset are identical */
 817         if (!strcasecmp(src_code, dest_code))
 818                 return g_strdup(inbuf);
 819
 820         cd = iconv_open(dest_code, src_code);
 821         if (cd == (iconv_t)-1)
 822                 return NULL;
 823
 824         inbuf_p = inbuf;
 825         in_size = strlen(inbuf);
 826         in_left = in_size;
 827         out_size = (in_size + 1) * 2;
 828         outbuf = g_malloc(out_size);
 829         outbuf_p = outbuf;
 830         out_left = out_size;
 831
 832 #define EXPAND_BUF()                            \
 833 {                                               \
 834         len = outbuf_p - outbuf;                \
 835         out_size *= 2;                          \
 836         outbuf = g_realloc(outbuf, out_size);   \
 837         outbuf_p = outbuf + len;                \
 838         out_left = out_size - len;              \
 839 }
 840
 841         while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
 842                                &outbuf_p, &out_left)) < 0) {
 843                 if (EILSEQ == errno) {
 844                         inbuf_p++;
 845                         in_left--;
 846                         if (out_left == 0) {
 847                                 EXPAND_BUF();
 848                         }
 849                         *outbuf_p++ = SUBST_CHAR;
 850                         out_left--;
 851                 } else if (EINVAL == errno) {
 852                         break;
 853                 } else if (E2BIG == errno) {
 854                         EXPAND_BUF();
 855                 } else {
 856                         g_warning("conv_iconv_strdup(): %s\n",
 857                                   g_strerror(errno));
 858                         break;
 859                 }
 860         }
 861
 862         while ((n_conv = iconv(cd, NULL, NULL, &outbuf_p, &out_left)) < 0) {
 863                 if (E2BIG == errno) {
 864                         EXPAND_BUF();
 865                 } else {
 866                         g_warning("conv_iconv_strdup(): %s\n",
 867                                   g_strerror(errno));
 868                         break;
 869                 }
 870         }
 871
 872 #undef EXPAND_BUF
 873
 874         len = outbuf_p - outbuf;
 875         outbuf = g_realloc(outbuf, len + 1);
 876         outbuf[len] = '\0';
 877
 878         iconv_close(cd);
 879
 880         return outbuf;
 881 }
 882 #endif /* HAVE_ICONV */
 883
 884 static const struct {
 885         CharSet charset;
 886         gchar *const name;
 887 } charsets[] = {
 888         {C_US_ASCII,            CS_US_ASCII},
 889         {C_US_ASCII,            CS_ANSI_X3_4_1968},
 890         {C_UTF_8,               CS_UTF_8},
 891         {C_UTF_7,               CS_UTF_7},
 892         {C_ISO_8859_1,          CS_ISO_8859_1},
 893         {C_ISO_8859_2,          CS_ISO_8859_2},
 894         {C_ISO_8859_3,          CS_ISO_8859_3},
 895         {C_ISO_8859_4,          CS_ISO_8859_4},
 896         {C_ISO_8859_5,          CS_ISO_8859_5},
 897         {C_ISO_8859_6,          CS_ISO_8859_6},
 898         {C_ISO_8859_7,          CS_ISO_8859_7},
 899         {C_ISO_8859_8,          CS_ISO_8859_8},
 900         {C_ISO_8859_9,          CS_ISO_8859_9},
 901         {C_ISO_8859_10,         CS_ISO_8859_10},
 902         {C_ISO_8859_11,         CS_ISO_8859_11},
 903         {C_ISO_8859_13,         CS_ISO_8859_13},
 904         {C_ISO_8859_14,         CS_ISO_8859_14},
 905         {C_ISO_8859_15,         CS_ISO_8859_15},
 906         {C_BALTIC,              CS_BALTIC},
 907         {C_CP1250,              CS_CP1250},
 908         {C_CP1251,              CS_CP1251},
 909         {C_CP1252,              CS_CP1252},
 910         {C_CP1253,              CS_CP1253},
 911         {C_CP1254,              CS_CP1254},
 912         {C_CP1255,              CS_CP1255},
 913         {C_CP1256,              CS_CP1256},
 914         {C_CP1257,              CS_CP1257},
 915         {C_CP1258,              CS_CP1258},
 916         {C_WINDOWS_1250,        CS_WINDOWS_1250},
 917         {C_WINDOWS_1251,        CS_WINDOWS_1251},
 918         {C_WINDOWS_1252,        CS_WINDOWS_1252},
 919         {C_WINDOWS_1253,        CS_WINDOWS_1253},
 920         {C_WINDOWS_1254,        CS_WINDOWS_1254},
 921         {C_WINDOWS_1255,        CS_WINDOWS_1255},
 922         {C_WINDOWS_1256,        CS_WINDOWS_1256},
 923         {C_WINDOWS_1257,        CS_WINDOWS_1257},
 924         {C_WINDOWS_1258,        CS_WINDOWS_1258},
 925         {C_KOI8_R,              CS_KOI8_R},
 926         {C_KOI8_T,              CS_KOI8_T},
 927         {C_KOI8_U,              CS_KOI8_U},
 928         {C_ISO_2022_JP,         CS_ISO_2022_JP},
 929         {C_ISO_2022_JP_2,       CS_ISO_2022_JP_2},
 930         {C_EUC_JP,              CS_EUC_JP},
 931         {C_EUC_JP,              CS_EUCJP},
 932         {C_SHIFT_JIS,           CS_SHIFT_JIS},
 933         {C_SHIFT_JIS,           CS_SHIFT__JIS},
 934         {C_SHIFT_JIS,           CS_SJIS},
 935         {C_ISO_2022_KR,         CS_ISO_2022_KR},
 936         {C_EUC_KR,              CS_EUC_KR},
 937         {C_ISO_2022_CN,         CS_ISO_2022_CN},
 938         {C_EUC_CN,              CS_EUC_CN},
 939         {C_GB2312,              CS_GB2312},
 940         {C_GBK,                 CS_GBK},
 941         {C_EUC_TW,              CS_EUC_TW},
 942         {C_BIG5,                CS_BIG5},
 943         {C_BIG5_HKSCS,          CS_BIG5_HKSCS},
 944         {C_TIS_620,             CS_TIS_620},
 945         {C_WINDOWS_874,         CS_WINDOWS_874},
 946         {C_GEORGIAN_PS,         CS_GEORGIAN_PS},
 947         {C_TCVN5712_1,          CS_TCVN5712_1},
 948 };
 949
 950 static const struct {
 951         gchar *const locale;
 952         CharSet charset;
 953         CharSet out_charset;
 954 } locale_table[] = {
 955         {"ja_JP.eucJP"  , C_EUC_JP      , C_ISO_2022_JP},
 956         {"ja_JP.EUC-JP" , C_EUC_JP      , C_ISO_2022_JP},
 957         {"ja_JP.EUC"    , C_EUC_JP      , C_ISO_2022_JP},
 958         {"ja_JP.ujis"   , C_EUC_JP      , C_ISO_2022_JP},
 959         {"ja_JP.SJIS"   , C_SHIFT_JIS   , C_ISO_2022_JP},
 960         {"ja_JP.JIS"    , C_ISO_2022_JP , C_ISO_2022_JP},
 961         {"ja_JP"        , C_EUC_JP      , C_ISO_2022_JP},
 962         {"ko_KR.EUC-KR" , C_EUC_KR      , C_EUC_KR},
 963         {"ko_KR"        , C_EUC_KR      , C_EUC_KR},
 964         {"zh_CN.GB2312" , C_GB2312      , C_GB2312},
 965         {"zh_CN.GBK"    , C_GBK         , C_GB2312},
 966         {"zh_CN"        , C_GB2312      , C_GB2312},
 967         {"zh_HK"        , C_BIG5_HKSCS  , C_BIG5_HKSCS},
 968         {"zh_TW.eucTW"  , C_EUC_TW      , C_BIG5},
 969         {"zh_TW.EUC-TW" , C_EUC_TW      , C_BIG5},
 970         {"zh_TW.Big5"   , C_BIG5        , C_BIG5},
 971         {"zh_TW"        , C_BIG5        , C_BIG5},
 972
 973         {"ru_RU.KOI8-R" , C_KOI8_R      , C_KOI8_R},
 974         {"ru_RU.KOI8R"  , C_KOI8_R      , C_KOI8_R},
 975         {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
 976         {"ru_RU"        , C_ISO_8859_5  , C_KOI8_R},
 977         {"tg_TJ"        , C_KOI8_T      , C_KOI8_T},
 978         {"ru_UA"        , C_KOI8_U      , C_KOI8_U},
 979         {"uk_UA"        , C_KOI8_U      , C_KOI8_U},
 980
 981         {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
 982         {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
 983
 984         {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
 985
 986         {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
 987         {"br_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
 988         {"ca_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
 989         {"da_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
 990         {"de_AT"        , C_ISO_8859_1  , C_ISO_8859_1},
 991         {"de_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
 992         {"de_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
 993         {"de_DE"        , C_ISO_8859_1  , C_ISO_8859_1},
 994         {"de_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
 995         {"en_AU"        , C_ISO_8859_1  , C_ISO_8859_1},
 996         {"en_BW"        , C_ISO_8859_1  , C_ISO_8859_1},
 997         {"en_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
 998         {"en_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
 999         {"en_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1000         {"en_HK"        , C_ISO_8859_1  , C_ISO_8859_1},
1001         {"en_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1002         {"en_NZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1003         {"en_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1004         {"en_SG"        , C_ISO_8859_1  , C_ISO_8859_1},
1005         {"en_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1006         {"en_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
1007         {"en_ZW"        , C_ISO_8859_1  , C_ISO_8859_1},
1008         {"es_AR"        , C_ISO_8859_1  , C_ISO_8859_1},
1009         {"es_BO"        , C_ISO_8859_1  , C_ISO_8859_1},
1010         {"es_CL"        , C_ISO_8859_1  , C_ISO_8859_1},
1011         {"es_CO"        , C_ISO_8859_1  , C_ISO_8859_1},
1012         {"es_CR"        , C_ISO_8859_1  , C_ISO_8859_1},
1013         {"es_DO"        , C_ISO_8859_1  , C_ISO_8859_1},
1014         {"es_EC"        , C_ISO_8859_1  , C_ISO_8859_1},
1015         {"es_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1016         {"es_GT"        , C_ISO_8859_1  , C_ISO_8859_1},
1017         {"es_HN"        , C_ISO_8859_1  , C_ISO_8859_1},
1018         {"es_MX"        , C_ISO_8859_1  , C_ISO_8859_1},
1019         {"es_NI"        , C_ISO_8859_1  , C_ISO_8859_1},
1020         {"es_PA"        , C_ISO_8859_1  , C_ISO_8859_1},
1021         {"es_PE"        , C_ISO_8859_1  , C_ISO_8859_1},
1022         {"es_PR"        , C_ISO_8859_1  , C_ISO_8859_1},
1023         {"es_PY"        , C_ISO_8859_1  , C_ISO_8859_1},
1024         {"es_SV"        , C_ISO_8859_1  , C_ISO_8859_1},
1025         {"es_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1026         {"es_UY"        , C_ISO_8859_1  , C_ISO_8859_1},
1027         {"es_VE"        , C_ISO_8859_1  , C_ISO_8859_1},
1028         {"et_EE"        , C_ISO_8859_1  , C_ISO_8859_1},
1029         {"eu_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1030         {"fi_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1031         {"fo_FO"        , C_ISO_8859_1  , C_ISO_8859_1},
1032         {"fr_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1033         {"fr_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1034         {"fr_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1035         {"fr_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1036         {"fr_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1037         {"ga_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1038         {"gl_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1039         {"gv_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1040         {"id_ID"        , C_ISO_8859_1  , C_ISO_8859_1},
1041         {"is_IS"        , C_ISO_8859_1  , C_ISO_8859_1},
1042         {"it_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1043         {"it_IT"        , C_ISO_8859_1  , C_ISO_8859_1},
1044         {"kl_GL"        , C_ISO_8859_1  , C_ISO_8859_1},
1045         {"kw_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1046         {"ms_MY"        , C_ISO_8859_1  , C_ISO_8859_1},
1047         {"nl_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1048         {"nl_NL"        , C_ISO_8859_1  , C_ISO_8859_1},
1049         {"nn_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1050         {"no_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1051         {"oc_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1052         {"pt_BR"        , C_ISO_8859_1  , C_ISO_8859_1},
1053         {"pt_PT"        , C_ISO_8859_1  , C_ISO_8859_1},
1054         {"sq_AL"        , C_ISO_8859_1  , C_ISO_8859_1},
1055         {"sv_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1056         {"sv_SE"        , C_ISO_8859_1  , C_ISO_8859_1},
1057         {"tl_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1058         {"uz_UZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1059         {"wa_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1060
1061         {"bs_BA"        , C_ISO_8859_2  , C_ISO_8859_2},
1062         {"cs_CZ"        , C_ISO_8859_2  , C_ISO_8859_2},
1063         {"hr_HR"        , C_ISO_8859_2  , C_ISO_8859_2},
1064         {"hu_HU"        , C_ISO_8859_2  , C_ISO_8859_2},
1065         {"pl_PL"        , C_ISO_8859_2  , C_ISO_8859_2},
1066         {"ro_RO"        , C_ISO_8859_2  , C_ISO_8859_2},
1067         {"sk_SK"        , C_ISO_8859_2  , C_ISO_8859_2},
1068         {"sl_SI"        , C_ISO_8859_2  , C_ISO_8859_2},
1069
1070         {"sr_YU@cyrillic"       , C_ISO_8859_5  , C_ISO_8859_5},
1071         {"sr_YU"                , C_ISO_8859_2  , C_ISO_8859_2},
1072
1073         {"mt_MT"                , C_ISO_8859_3  , C_ISO_8859_3},
1074
1075         {"lt_LT.iso88594"       , C_ISO_8859_4  , C_ISO_8859_4},
1076         {"lt_LT.ISO8859-4"      , C_ISO_8859_4  , C_ISO_8859_4},
1077         {"lt_LT.ISO_8859-4"     , C_ISO_8859_4  , C_ISO_8859_4},
1078         {"lt_LT"                , C_ISO_8859_13 , C_ISO_8859_13},
1079
1080         {"mk_MK"        , C_ISO_8859_5  , C_ISO_8859_5},
1081
1082         {"ar_AE"        , C_ISO_8859_6  , C_ISO_8859_6},
1083         {"ar_BH"        , C_ISO_8859_6  , C_ISO_8859_6},
1084         {"ar_DZ"        , C_ISO_8859_6  , C_ISO_8859_6},
1085         {"ar_EG"        , C_ISO_8859_6  , C_ISO_8859_6},
1086         {"ar_IQ"        , C_ISO_8859_6  , C_ISO_8859_6},
1087         {"ar_JO"        , C_ISO_8859_6  , C_ISO_8859_6},
1088         {"ar_KW"        , C_ISO_8859_6  , C_ISO_8859_6},
1089         {"ar_LB"        , C_ISO_8859_6  , C_ISO_8859_6},
1090         {"ar_LY"        , C_ISO_8859_6  , C_ISO_8859_6},
1091         {"ar_MA"        , C_ISO_8859_6  , C_ISO_8859_6},
1092         {"ar_OM"        , C_ISO_8859_6  , C_ISO_8859_6},
1093         {"ar_QA"        , C_ISO_8859_6  , C_ISO_8859_6},
1094         {"ar_SA"        , C_ISO_8859_6  , C_ISO_8859_6},
1095         {"ar_SD"        , C_ISO_8859_6  , C_ISO_8859_6},
1096         {"ar_SY"        , C_ISO_8859_6  , C_ISO_8859_6},
1097         {"ar_TN"        , C_ISO_8859_6  , C_ISO_8859_6},
1098         {"ar_YE"        , C_ISO_8859_6  , C_ISO_8859_6},
1099
1100         {"el_GR"        , C_ISO_8859_7  , C_ISO_8859_7},
1101         {"he_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1102         {"iw_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1103         {"tr_TR"        , C_ISO_8859_9  , C_ISO_8859_9},
1104
1105         {"lv_LV"        , C_ISO_8859_13 , C_ISO_8859_13},
1106         {"mi_NZ"        , C_ISO_8859_13 , C_ISO_8859_13},
1107
1108         {"cy_GB"        , C_ISO_8859_14 , C_ISO_8859_14},
1109
1110         {"ar_IN"        , C_UTF_8       , C_UTF_8},
1111         {"en_IN"        , C_UTF_8       , C_UTF_8},
1112         {"se_NO"        , C_UTF_8       , C_UTF_8},
1113         {"ta_IN"        , C_UTF_8       , C_UTF_8},
1114         {"te_IN"        , C_UTF_8       , C_UTF_8},
1115         {"ur_PK"        , C_UTF_8       , C_UTF_8},
1116
1117         {"th_TH"        , C_TIS_620     , C_TIS_620},
1118         /* {"th_TH"     , C_WINDOWS_874}, */
1119         /* {"th_TH"     , C_ISO_8859_11}, */
1120
1121         {"ka_GE"        , C_GEORGIAN_PS , C_GEORGIAN_PS},
1122         {"vi_VN.TCVN"   , C_TCVN5712_1  , C_TCVN5712_1},
1123
1124         {"C"                    , C_US_ASCII    , C_US_ASCII},
1125         {"POSIX"                , C_US_ASCII    , C_US_ASCII},
1126         {"ANSI_X3.4-1968"       , C_US_ASCII    , C_US_ASCII},
1127 };
1128
1129 static GHashTable *conv_get_charset_to_str_table(void)
1130 {
1131         static GHashTable *table;
1132         gint i;
1133
1134         if (table)
1135                 return table;
1136
1137         table = g_hash_table_new(NULL, g_direct_equal);
1138
1139         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1140                 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1141                     == NULL) {
1142                         g_hash_table_insert
1143                                 (table, GUINT_TO_POINTER(charsets[i].charset),
1144                                  charsets[i].name);
1145                 }
1146         }
1147
1148         return table;
1149 }
1150
1151 static gint str_case_equal(gconstpointer v, gconstpointer v2)
1152 {
1153         return strcasecmp((const gchar *)v, (const gchar *)v2) == 0;
1154 }
1155
1156 static guint str_case_hash(gconstpointer key)
1157 {
1158         const gchar *p = key;
1159         guint h = *p;
1160
1161         if (h) {
1162                 h = tolower(h);
1163                 for (p += 1; *p != '\0'; p++)
1164                         h = (h << 5) - h + tolower(*p);
1165         }
1166
1167         return h;
1168 }
1169
1170 static GHashTable *conv_get_charset_from_str_table(void)
1171 {
1172         static GHashTable *table;
1173         gint i;
1174
1175         if (table)
1176                 return table;
1177
1178         table = g_hash_table_new(str_case_hash, str_case_equal);
1179
1180         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1181                 g_hash_table_insert(table, charsets[i].name,
1182                                     GUINT_TO_POINTER(charsets[i].charset));
1183         }
1184
1185         return table;
1186 }
1187
1188 const gchar *conv_get_charset_str(CharSet charset)
1189 {
1190         GHashTable *table;
1191
1192         table = conv_get_charset_to_str_table();
1193         return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1194 }
1195
1196 CharSet conv_get_charset_from_str(const gchar *charset)
1197 {
1198         GHashTable *table;
1199
1200         if (!charset) return C_AUTO;
1201
1202         table = conv_get_charset_from_str_table();
1203         return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1204 }
1205
1206 CharSet conv_get_current_charset(void)
1207 {
1208         static CharSet cur_charset = -1;
1209         const gchar *cur_locale;
1210         const gchar *p;
1211         gint i;
1212
1213         if (cur_charset != -1)
1214                 return cur_charset;
1215
1216         cur_locale = conv_get_current_locale();
1217         if (!cur_locale) {
1218                 cur_charset = C_US_ASCII;
1219                 return cur_charset;
1220         }
1221
1222         if (strcasestr(cur_locale, "UTF-8")) {
1223                 cur_charset = C_UTF_8;
1224                 return cur_charset;
1225         }
1226
1227         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1228                 cur_charset = C_ISO_8859_15;
1229                 return cur_charset;
1230         }
1231
1232         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1233                 const gchar *p;
1234
1235                 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1236                    "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1237                 if (!strncasecmp(cur_locale, locale_table[i].locale,
1238                                  strlen(locale_table[i].locale))) {
1239                         cur_charset = locale_table[i].charset;
1240                         return cur_charset;
1241                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1242                          !strchr(p + 1, '.')) {
1243                         if (strlen(cur_locale) == 2 &&
1244                             !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1245                                 cur_charset = locale_table[i].charset;
1246                                 return cur_charset;
1247                         }
1248                 }
1249         }
1250
1251         cur_charset = C_AUTO;
1252         return cur_charset;
1253 }
1254
1255 const gchar *conv_get_current_charset_str(void)
1256 {
1257         static const gchar *codeset = NULL;
1258
1259         if (!codeset)
1260                 codeset = conv_get_charset_str(conv_get_current_charset());
1261
1262         return codeset ? codeset : CS_US_ASCII;
1263 }
1264
1265 CharSet conv_get_outgoing_charset(void)
1266 {
1267         static CharSet out_charset = -1;
1268         const gchar *cur_locale;
1269         const gchar *p;
1270         gint i;
1271
1272         if (out_charset != -1)
1273                 return out_charset;
1274
1275         cur_locale = conv_get_current_locale();
1276         if (!cur_locale) {
1277                 out_charset = C_AUTO;
1278                 return out_charset;
1279         }
1280
1281         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1282                 out_charset = C_ISO_8859_15;
1283                 return out_charset;
1284         }
1285
1286         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1287                 const gchar *p;
1288
1289                 if (!strncasecmp(cur_locale, locale_table[i].locale,
1290                                  strlen(locale_table[i].locale))) {
1291                         out_charset = locale_table[i].out_charset;
1292                         break;
1293                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1294                          !strchr(p + 1, '.')) {
1295                         if (strlen(cur_locale) == 2 &&
1296                             !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1297                                 out_charset = locale_table[i].out_charset;
1298                                 break;
1299                         }
1300                 }
1301         }
1302
1303 #if !HAVE_ICONV
1304         /* encoding conversion without iconv() is only supported
1305            on Japanese locale for now */
1306         if (out_charset == C_ISO_2022_JP)
1307                 return out_charset;
1308         else
1309                 return conv_get_current_charset();
1310 #endif
1311
1312         return out_charset;
1313 }
1314
1315 const gchar *conv_get_outgoing_charset_str(void)
1316 {
1317         CharSet out_charset;
1318         const gchar *str;
1319
1320         if (prefs_common.outgoing_charset) {
1321                 if (!isalpha(prefs_common.outgoing_charset[0])) {
1322                         g_free(prefs_common.outgoing_charset);
1323                         prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1324                 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1325                         return prefs_common.outgoing_charset;
1326         }
1327
1328         out_charset = conv_get_outgoing_charset();
1329         str = conv_get_charset_str(out_charset);
1330
1331         return str ? str : CS_US_ASCII;
1332 }
1333
1334 gboolean conv_is_multibyte_encoding(CharSet encoding)
1335 {
1336         switch (encoding) {
1337         case C_EUC_JP:
1338         case C_EUC_KR:
1339         case C_EUC_TW:
1340         case C_EUC_CN:
1341         case C_ISO_2022_JP:
1342         case C_ISO_2022_JP_2:
1343         case C_ISO_2022_KR:
1344         case C_ISO_2022_CN:
1345         case C_SHIFT_JIS:
1346         case C_GB2312:
1347         case C_BIG5:
1348         case C_UTF_8:
1349         case C_UTF_7:
1350                 return TRUE;
1351         default:
1352                 return FALSE;
1353         }
1354 }
1355
1356 const gchar *conv_get_current_locale(void)
1357 {
1358         gchar *cur_locale;
1359
1360         cur_locale = g_getenv("LC_ALL");
1361         if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1362         if (!cur_locale) cur_locale = g_getenv("LANG");
1363         if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1364
1365         debug_print("current locale: %s\n",
1366                     cur_locale ? cur_locale : "(none)");
1367
1368         return cur_locale;
1369 }
1370
1371 void conv_unmime_header_overwrite(gchar *str)
1372 {
1373         gchar *buf;
1374         gint buflen;
1375         CharSet cur_charset;
1376
1377         cur_charset = conv_get_current_charset();
1378
1379         if (cur_charset == C_EUC_JP) {
1380                 buflen = strlen(str) * 2 + 1;
1381                 Xalloca(buf, buflen, return);
1382                 conv_anytodisp(buf, buflen, str);
1383                 unmime_header(str, buf);
1384         } else {
1385                 buflen = strlen(str) + 1;
1386                 Xalloca(buf, buflen, return);
1387                 unmime_header(buf, str);
1388                 strncpy2(str, buf, buflen);
1389         }
1390 }
1391
1392 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1393                         const gchar *charset)
1394 {
1395         CharSet cur_charset;
1396
1397         cur_charset = conv_get_current_charset();
1398
1399         if (cur_charset == C_EUC_JP) {
1400                 gchar *buf;
1401                 gint buflen;
1402
1403                 buflen = strlen(str) * 2 + 1;
1404                 Xalloca(buf, buflen, return);
1405                 conv_anytodisp(buf, buflen, str);
1406                 unmime_header(outbuf, buf);
1407         } else
1408                 unmime_header(outbuf, str);
1409 }
1410
1411 #define MAX_LINELEN             76
1412 #define MAX_HARD_LINELEN        996
1413 #define MIMESEP_BEGIN           "=?"
1414 #define MIMESEP_END             "?="
1415
1416 #define B64LEN(len)     ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1417
1418 #define LBREAK_IF_REQUIRED(cond, is_plain_text)                         \
1419 {                                                                       \
1420         if (len - (destp - dest) < MAX_LINELEN + 2) {                   \
1421                 *destp = '\0';                                          \
1422                 return;                                                 \
1423         }                                                               \
1424                                                                         \
1425         if ((cond) && *srcp) {                                          \
1426                 if (destp > dest && left < MAX_LINELEN - 1) {           \
1427                         if (isspace(*(destp - 1)))                      \
1428                                 destp--;                                \
1429                         else if (is_plain_text && isspace(*srcp))       \
1430                                 srcp++;                                 \
1431                         if (*srcp) {                                    \
1432                                 *destp++ = '\n';                        \
1433                                 *destp++ = ' ';                         \
1434                                 left = MAX_LINELEN - 1;                 \
1435                         }                                               \
1436                 }                                                       \
1437         }                                                               \
1438 }
1439
1440 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1441                         gint header_len)
1442 {
1443         const gchar *cur_encoding;
1444         const gchar *out_encoding;
1445         gint mimestr_len;
1446         gchar *mimesep_enc;
1447         gint left;
1448         const gchar *srcp = src;
1449         gchar *destp = dest;
1450         gboolean use_base64;
1451
1452         if (MB_CUR_MAX > 1) {
1453                 use_base64 = TRUE;
1454                 mimesep_enc = "?B?";
1455         } else {
1456                 use_base64 = FALSE;
1457                 mimesep_enc = "?Q?";
1458         }
1459
1460         cur_encoding = conv_get_current_charset_str();
1461         if (!strcmp(cur_encoding, CS_US_ASCII))
1462                 cur_encoding = CS_ISO_8859_1;
1463         out_encoding = conv_get_outgoing_charset_str();
1464         if (!strcmp(out_encoding, CS_US_ASCII))
1465                 out_encoding = CS_ISO_8859_1;
1466
1467         mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1468                 strlen(mimesep_enc) + strlen(MIMESEP_END);
1469
1470         left = MAX_LINELEN - header_len;
1471
1472         while (*srcp) {
1473                 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1474
1475                 while (isspace(*srcp)) {
1476                         *destp++ = *srcp++;
1477                         left--;
1478                         LBREAK_IF_REQUIRED(left <= 0, TRUE);
1479                 }
1480
1481                 /* output as it is if the next word is ASCII string */
1482                 if (!is_next_nonascii(srcp)) {
1483                         gint word_len;
1484
1485                         word_len = get_next_word_len(srcp);
1486                         LBREAK_IF_REQUIRED(left < word_len, TRUE);
1487                         while (word_len > 0) {
1488                                 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1489                                 *destp++ = *srcp++;
1490                                 left--;
1491                                 word_len--;
1492                         }
1493
1494                         continue;
1495                 }
1496
1497                 while (1) {
1498                         gint mb_len = 0;
1499                         gint cur_len = 0;
1500                         gchar *part_str;
1501                         gchar *out_str;
1502                         gchar *enc_str;
1503                         const gchar *p = srcp;
1504                         gint out_str_len;
1505                         gint out_enc_str_len;
1506                         gint mime_block_len;
1507                         gboolean cont = FALSE;
1508
1509                         while (*p != '\0') {
1510                                 if (isspace(*p) && !is_next_nonascii(p + 1))
1511                                         break;
1512
1513                                 if (MB_CUR_MAX > 1) {
1514                                         mb_len = mblen(p, MB_CUR_MAX);
1515                                         if (mb_len < 0) {
1516                                                 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1517                                                 mb_len = 1;
1518                                         }
1519                                 } else
1520                                         mb_len = 1;
1521
1522                                 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1523                                 out_str = conv_codeset_strdup
1524                                         (part_str, cur_encoding, out_encoding);
1525                                 if (!out_str) {
1526                                         g_warning("conv_encode_header(): code conversion failed\n");
1527                                         conv_unreadable_8bit(part_str);
1528                                         out_str = g_strdup(part_str);
1529                                 }
1530                                 out_str_len = strlen(out_str);
1531
1532                                 if (use_base64)
1533                                         out_enc_str_len = B64LEN(out_str_len);
1534                                 else
1535                                         out_enc_str_len =
1536                                                 qp_get_q_encoding_len(out_str);
1537
1538                                 g_free(out_str);
1539
1540                                 if (mimestr_len + out_enc_str_len <= left) {
1541                                         cur_len += mb_len;
1542                                         p += mb_len;
1543                                 } else if (cur_len == 0) {
1544                                         LBREAK_IF_REQUIRED(1, FALSE);
1545                                         continue;
1546                                 } else {
1547                                         cont = TRUE;
1548                                         break;
1549                                 }
1550                         }
1551
1552                         if (cur_len > 0) {
1553                                 Xstrndup_a(part_str, srcp, cur_len, );
1554                                 out_str = conv_codeset_strdup
1555                                         (part_str, cur_encoding, out_encoding);
1556                                 if (!out_str) {
1557                                         g_warning("conv_encode_header(): code conversion failed\n");
1558                                         conv_unreadable_8bit(part_str);
1559                                         out_str = g_strdup(part_str);
1560                                 }
1561                                 out_str_len = strlen(out_str);
1562
1563                                 if (use_base64)
1564                                         out_enc_str_len = B64LEN(out_str_len);
1565                                 else
1566                                         out_enc_str_len =
1567                                                 qp_get_q_encoding_len(out_str);
1568
1569                                 Xalloca(enc_str, out_enc_str_len + 1, );
1570                                 if (use_base64)
1571                                         base64_encode(enc_str, out_str, out_str_len);
1572                                 else
1573                                         qp_q_encode(enc_str, out_str);
1574
1575                                 g_free(out_str);
1576
1577                                 /* output MIME-encoded string block */
1578                                 mime_block_len = mimestr_len + strlen(enc_str);
1579                                 g_snprintf(destp, mime_block_len + 1,
1580                                            MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1581                                            out_encoding, mimesep_enc, enc_str);
1582                                 destp += mime_block_len;
1583                                 srcp += cur_len;
1584
1585                                 left -= mime_block_len;
1586                         }
1587
1588                         LBREAK_IF_REQUIRED(cont, FALSE);
1589
1590                         if (cur_len == 0)
1591                                 break;
1592                 }
1593         }
1594
1595         *destp = '\0';
1596 }
1597
1598 #undef LBREAK_IF_REQUIRED