src/codeconv.c

   1 /*
   2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
   3  * Copyright (C) 1999-2003 Hiroyuki Yamamoto
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 #  include "config.h"
  22 #endif
  23
  24 #include <glib.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include <stdlib.h>
  28 #include <errno.h>
  29
  30 #if HAVE_LOCALE_H
  31 #  include <locale.h>
  32 #endif
  33
  34 #if HAVE_ICONV
  35 #  include <iconv.h>
  36 #endif
  37
  38 #include "intl.h"
  39 #include "codeconv.h"
  40 #include "unmime.h"
  41 #include "base64.h"
  42 #include "quoted-printable.h"
  43 #include "utils.h"
  44 #include "prefs_common.h"
  45
  46 typedef enum
  47 {
  48         JIS_ASCII,
  49         JIS_KANJI,
  50         JIS_HWKANA,
  51         JIS_AUXKANJI
  52 } JISState;
  53
  54 #define SUBST_CHAR      '_'
  55 #define ESC             '\033'
  56
  57 #define iseuckanji(c) \
  58         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
  59 #define iseuchwkana1(c) \
  60         (((c) & 0xff) == 0x8e)
  61 #define iseuchwkana2(c) \
  62         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  63 #define iseucaux(c) \
  64         (((c) & 0xff) == 0x8f)
  65 #define issjiskanji1(c) \
  66         ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
  67          (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
  68 #define issjiskanji2(c) \
  69         ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
  70          (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
  71 #define issjishwkana(c) \
  72         (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
  73
  74 #define K_IN()                          \
  75         if (state != JIS_KANJI) {       \
  76                 *out++ = ESC;           \
  77                 *out++ = '$';           \
  78                 *out++ = 'B';           \
  79                 state = JIS_KANJI;      \
  80         }
  81
  82 #define K_OUT()                         \
  83         if (state != JIS_ASCII) {       \
  84                 *out++ = ESC;           \
  85                 *out++ = '(';           \
  86                 *out++ = 'B';           \
  87                 state = JIS_ASCII;      \
  88         }
  89
  90 #define HW_IN()                         \
  91         if (state != JIS_HWKANA) {      \
  92                 *out++ = ESC;           \
  93                 *out++ = '(';           \
  94                 *out++ = 'I';           \
  95                 state = JIS_HWKANA;     \
  96         }
  97
  98 #define AUX_IN()                        \
  99         if (state != JIS_AUXKANJI) {    \
 100                 *out++ = ESC;           \
 101                 *out++ = '$';           \
 102                 *out++ = '(';           \
 103                 *out++ = 'D';           \
 104                 state = JIS_AUXKANJI;   \
 105         }
 106
 107 void conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 108 {
 109         const guchar *in = inbuf;
 110         guchar *out = outbuf;
 111         JISState state = JIS_ASCII;
 112
 113         while (*in != '\0') {
 114                 if (*in == ESC) {
 115                         in++;
 116                         if (*in == '$') {
 117                                 if (*(in + 1) == '@' || *(in + 1) == 'B') {
 118                                         state = JIS_KANJI;
 119                                         in += 2;
 120                                 } else if (*(in + 1) == '(' &&
 121                                            *(in + 2) == 'D') {
 122                                         state = JIS_AUXKANJI;
 123                                         in += 3;
 124                                 } else {
 125                                         /* unknown escape sequence */
 126                                         state = JIS_ASCII;
 127                                 }
 128                         } else if (*in == '(') {
 129                                 if (*(in + 1) == 'B' || *(in + 1) == 'J') {
 130                                         state = JIS_ASCII;
 131                                         in += 2;
 132                                 } else if (*(in + 1) == 'I') {
 133                                         state = JIS_HWKANA;
 134                                         in += 2;
 135                                 } else {
 136                                         /* unknown escape sequence */
 137                                         state = JIS_ASCII;
 138                                 }
 139                         } else {
 140                                 /* unknown escape sequence */
 141                                 state = JIS_ASCII;
 142                         }
 143                 } else if (*in == 0x0e) {
 144                         state = JIS_HWKANA;
 145                         in++;
 146                 } else if (*in == 0x0f) {
 147                         state = JIS_ASCII;
 148                         in++;
 149                 } else {
 150                         switch (state) {
 151                         case JIS_ASCII:
 152                                 *out++ = *in++;
 153                                 break;
 154                         case JIS_KANJI:
 155                                 *out++ = *in++ | 0x80;
 156                                 if (*in == '\0') break;
 157                                 *out++ = *in++ | 0x80;
 158                                 break;
 159                         case JIS_HWKANA:
 160                                 *out++ = 0x8e;
 161                                 *out++ = *in++ | 0x80;
 162                                 break;
 163                         case JIS_AUXKANJI:
 164                                 *out++ = 0x8f;
 165                                 *out++ = *in++ | 0x80;
 166                                 if (*in == '\0') break;
 167                                 *out++ = *in++ | 0x80;
 168                                 break;
 169                         }
 170                 }
 171         }
 172
 173         *out = '\0';
 174 }
 175
 176 void conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 177 {
 178         const guchar *in = inbuf;
 179         guchar *out = outbuf;
 180         JISState state = JIS_ASCII;
 181
 182         while (*in != '\0') {
 183                 if (isascii(*in)) {
 184                         K_OUT();
 185                         *out++ = *in++;
 186                 } else if (iseuckanji(*in)) {
 187                         if (iseuckanji(*(in + 1))) {
 188                                 K_IN();
 189                                 *out++ = *in++ & 0x7f;
 190                                 *out++ = *in++ & 0x7f;
 191                         } else {
 192                                 K_OUT();
 193                                 *out++ = SUBST_CHAR;
 194                                 in++;
 195                                 if (*in != '\0' && !isascii(*in)) {
 196                                         *out++ = SUBST_CHAR;
 197                                         in++;
 198                                 }
 199                         }
 200                 } else if (iseuchwkana1(*in)) {
 201                         in++;
 202                         if (iseuchwkana2(*in)) {
 203                                 HW_IN();
 204                                 *out++ = *in++ & 0x7f;
 205                         } else {
 206                                 K_OUT();
 207                                 if (*in != '\0' && !isascii(*in)) {
 208                                         *out++ = SUBST_CHAR;
 209                                         in++;
 210                                 }
 211                         }
 212                 } else if (iseucaux(*in)) {
 213                         in++;
 214                         if (iseuckanji(*in) && iseuckanji(*(in + 1))) {
 215                                 AUX_IN();
 216                                 *out++ = *in++ & 0x7f;
 217                                 *out++ = *in++ & 0x7f;
 218                         } else {
 219                                 K_OUT();
 220                                 if (*in != '\0' && !isascii(*in)) {
 221                                         *out++ = SUBST_CHAR;
 222                                         in++;
 223                                         if (*in != '\0' && !isascii(*in)) {
 224                                                 *out++ = SUBST_CHAR;
 225                                                 in++;
 226                                         }
 227                                 }
 228                         }
 229                 } else {
 230                         K_OUT();
 231                         *out++ = SUBST_CHAR;
 232                         in++;
 233                 }
 234         }
 235
 236         K_OUT();
 237         *out = '\0';
 238 }
 239
 240 void conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 241 {
 242         const guchar *in = inbuf;
 243         guchar *out = outbuf;
 244
 245         while (*in != '\0') {
 246                 if (isascii(*in)) {
 247                         *out++ = *in++;
 248                 } else if (issjiskanji1(*in)) {
 249                         if (issjiskanji2(*(in + 1))) {
 250                                 guchar out1 = *in;
 251                                 guchar out2 = *(in + 1);
 252                                 guchar row;
 253
 254                                 row = out1 < 0xa0 ? 0x70 : 0xb0;
 255                                 if (out2 < 0x9f) {
 256                                         out1 = (out1 - row) * 2 - 1;
 257                                         out2 -= out2 > 0x7f ? 0x20 : 0x1f;
 258                                 } else {
 259                                         out1 = (out1 - row) * 2;
 260                                         out2 -= 0x7e;
 261                                 }
 262
 263                                 *out++ = out1 | 0x80;
 264                                 *out++ = out2 | 0x80;
 265                                 in += 2;
 266                         } else {
 267                                 *out++ = SUBST_CHAR;
 268                                 in++;
 269                                 if (*in != '\0' && !isascii(*in)) {
 270                                         *out++ = SUBST_CHAR;
 271                                         in++;
 272                                 }
 273                         }
 274                 } else if (issjishwkana(*in)) {
 275                         *out++ = 0x8e;
 276                         *out++ = *in++;
 277                 } else {
 278                         *out++ = SUBST_CHAR;
 279                         in++;
 280                 }
 281         }
 282
 283         *out = '\0';
 284 }
 285
 286 void conv_anytoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
 287 {
 288         switch (conv_guess_ja_encoding(inbuf)) {
 289         case C_ISO_2022_JP:
 290                 conv_jistoeuc(outbuf, outlen, inbuf);
 291                 break;
 292         case C_SHIFT_JIS:
 293                 conv_sjistoeuc(outbuf, outlen, inbuf);
 294                 break;
 295         default:
 296                 strncpy2(outbuf, inbuf, outlen);
 297                 break;
 298         }
 299 }
 300
 301 void conv_anytojis(gchar *outbuf, gint outlen, const gchar *inbuf)
 302 {
 303         switch (conv_guess_ja_encoding(inbuf)) {
 304         case C_EUC_JP:
 305                 conv_euctojis(outbuf, outlen, inbuf);
 306                 break;
 307         default:
 308                 strncpy2(outbuf, inbuf, outlen);
 309                 break;
 310         }
 311 }
 312
 313 static gchar valid_eucjp_tbl[][96] = {
 314         /* 0xa2a0 - 0xa2ff */
 315         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 0,
 316           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 317           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 1, 1, 1, 1, 1, 1,
 318           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 1, 1, 1, 1,
 319           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 320           0, 0, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 1, 0 },
 321
 322         /* 0xa3a0 - 0xa3ff */
 323         { 0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 324           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 0, 0, 0, 0, 0, 0,
 325           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 326           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0,
 327           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 328           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 0, 0 },
 329
 330         /* 0xa4a0 - 0xa4ff */
 331         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 332           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 333           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 334           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 335           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 336           1, 1, 1, 1, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 337
 338         /* 0xa5a0 - 0xa5ff */
 339         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 340           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 341           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 342           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 343           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 344           1, 1, 1, 1, 1, 1, 1, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 345
 346         /* 0xa6a0 - 0xa6ff */
 347         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 348           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 349           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 350           1, 1, 1, 1, 1, 1, 1, 1,  1, 0, 0, 0, 0, 0, 0, 0,
 351           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 352           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 353
 354         /* 0xa7a0 - 0xa7ff */
 355         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 356           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 357           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 358           0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 359           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 360           1, 1, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 },
 361
 362         /* 0xa8a0 - 0xa8ff */
 363         { 0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 364           1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
 365           1, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 366           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 367           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
 368           0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0 }
 369 };
 370
 371 static gboolean isprintableeuckanji(guchar c1, guchar c2)
 372 {
 373         if (c1 <= 0xa0 || c1 >= 0xf5)
 374                 return FALSE;
 375         if (c2 <= 0xa0 || c2 == 0xff)
 376                 return FALSE;
 377
 378         if (c1 >= 0xa9 && c1 <= 0xaf)
 379                 return FALSE;
 380
 381         if (c1 >= 0xa2 && c1 <= 0xa8)
 382                 return (gboolean)valid_eucjp_tbl[c1 - 0xa2][c2 - 0xa0];
 383
 384         if (c1 == 0xcf) {
 385                 if (c2 >= 0xd4 && c2 <= 0xff)
 386                         return FALSE;
 387         } else if (c1 == 0xf4) {
 388                 if (c2 >= 0xa7 && c2 <= 0xff)
 389                         return FALSE;
 390         }
 391
 392         return TRUE;
 393 }
 394
 395 void conv_unreadable_eucjp(gchar *str)
 396 {
 397         register guchar *p = str;
 398
 399         while (*p != '\0') {
 400                 if (isascii(*p)) {
 401                         /* convert CR+LF -> LF */
 402                         if (*p == '\r' && *(p + 1) == '\n')
 403                                 memmove(p, p + 1, strlen(p));
 404                         /* printable 7 bit code */
 405                         p++;
 406                 } else if (iseuckanji(*p)) {
 407                         if (isprintableeuckanji(*p, *(p + 1))) {
 408                                 /* printable euc-jp code */
 409                                 p += 2;
 410                         } else {
 411                                 /* substitute unprintable code */
 412                                 *p++ = SUBST_CHAR;
 413                                 if (*p != '\0') {
 414                                         if (isascii(*p))
 415                                                 p++;
 416                                         else
 417                                                 *p++ = SUBST_CHAR;
 418                                 }
 419                         }
 420                 } else if (iseuchwkana1(*p)) {
 421                         if (iseuchwkana2(*(p + 1)))
 422                                 /* euc-jp hankaku kana */
 423                                 p += 2;
 424                         else
 425                                 *p++ = SUBST_CHAR;
 426                 } else if (iseucaux(*p)) {
 427                         if (iseuckanji(*(p + 1)) && iseuckanji(*(p + 2))) {
 428                                 /* auxiliary kanji */
 429                                 p += 3;
 430                         } else
 431                                 *p++ = SUBST_CHAR;
 432                 } else
 433                         /* substitute unprintable 1 byte code */
 434                         *p++ = SUBST_CHAR;
 435         }
 436 }
 437
 438 void conv_unreadable_8bit(gchar *str)
 439 {
 440         register guchar *p = str;
 441
 442         while (*p != '\0') {
 443                 /* convert CR+LF -> LF */
 444                 if (*p == '\r' && *(p + 1) == '\n')
 445                         memmove(p, p + 1, strlen(p));
 446                 else if (!isascii(*p)) *p = SUBST_CHAR;
 447                 p++;
 448         }
 449 }
 450
 451 void conv_unreadable_latin(gchar *str)
 452 {
 453         register guchar *p = str;
 454
 455         while (*p != '\0') {
 456                 /* convert CR+LF -> LF */
 457                 if (*p == '\r' && *(p + 1) == '\n')
 458                         memmove(p, p + 1, strlen(p));
 459                 else if ((*p & 0xff) >= 0x7f && (*p & 0xff) <= 0x9f)
 460                         *p = SUBST_CHAR;
 461                 p++;
 462         }
 463 }
 464
 465 #define NCV     '\0'
 466
 467 void conv_mb_alnum(gchar *str)
 468 {
 469         static guchar char_tbl[] = {
 470                 /* 0xa0 - 0xaf */
 471                 NCV, ' ', NCV, NCV, ',', '.', NCV, ':',
 472                 ';', '?', '!', NCV, NCV, NCV, NCV, NCV,
 473                 /* 0xb0 - 0xbf */
 474                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 475                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 476                 /* 0xc0 - 0xcf */
 477                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV,
 478                 NCV, NCV, '(', ')', NCV, NCV, '[', ']',
 479                 /* 0xd0 - 0xdf */
 480                 '{', '}', NCV, NCV, NCV, NCV, NCV, NCV,
 481                 NCV, NCV, NCV, NCV, '+', '-', NCV, NCV,
 482                 /* 0xe0 - 0xef */
 483                 NCV, '=', NCV, '<', '>', NCV, NCV, NCV,
 484                 NCV, NCV, NCV, NCV, NCV, NCV, NCV, NCV
 485         };
 486
 487         register guchar *p = str;
 488         register gint len;
 489
 490         len = strlen(str);
 491
 492         while (len > 1) {
 493                 if (*p == 0xa3) {
 494                         register guchar ch = *(p + 1);
 495
 496                         if (ch >= 0xb0 && ch <= 0xfa) {
 497                                 /* [a-zA-Z] */
 498                                 *p = ch & 0x7f;
 499                                 p++;
 500                                 len--;
 501                                 memmove(p, p + 1, len);
 502                                 len--;
 503                         } else  {
 504                                 p += 2;
 505                                 len -= 2;
 506                         }
 507                 } else if (*p == 0xa1) {
 508                         register guchar ch = *(p + 1);
 509
 510                         if (ch >= 0xa0 && ch <= 0xef &&
 511                             NCV != char_tbl[ch - 0xa0]) {
 512                                 *p = char_tbl[ch - 0xa0];
 513                                 p++;
 514                                 len--;
 515                                 memmove(p, p + 1, len);
 516                                 len--;
 517                         } else {
 518                                 p += 2;
 519                                 len -= 2;
 520                         }
 521                 } else if (iseuckanji(*p)) {
 522                         p += 2;
 523                         len -= 2;
 524                 } else {
 525                         p++;
 526                         len--;
 527                 }
 528         }
 529 }
 530
 531 CharSet conv_guess_ja_encoding(const gchar *str)
 532 {
 533         const guchar *p = str;
 534         CharSet guessed = C_US_ASCII;
 535
 536         while (*p != '\0') {
 537                 if (*p == ESC && (*(p + 1) == '$' || *(p + 1) == '(')) {
 538                         if (guessed == C_US_ASCII)
 539                                 return C_ISO_2022_JP;
 540                         p += 2;
 541                 } else if (isascii(*p)) {
 542                         p++;
 543                 } else if (iseuckanji(*p) && iseuckanji(*(p + 1))) {
 544                         if (*p >= 0xfd && *p <= 0xfe)
 545                                 return C_EUC_JP;
 546                         else if (guessed == C_SHIFT_JIS) {
 547                                 if ((issjiskanji1(*p) &&
 548                                      issjiskanji2(*(p + 1))) ||
 549                                     issjishwkana(*p))
 550                                         guessed = C_SHIFT_JIS;
 551                                 else
 552                                         guessed = C_EUC_JP;
 553                         } else
 554                                 guessed = C_EUC_JP;
 555                         p += 2;
 556                 } else if (issjiskanji1(*p) && issjiskanji2(*(p + 1))) {
 557                         if (iseuchwkana1(*p) && iseuchwkana2(*(p + 1)))
 558                                 guessed = C_SHIFT_JIS;
 559                         else
 560                                 return C_SHIFT_JIS;
 561                         p += 2;
 562                 } else if (issjishwkana(*p)) {
 563                         guessed = C_SHIFT_JIS;
 564                         p++;
 565                 } else {
 566                         p++;
 567                 }
 568         }
 569
 570         return guessed;
 571 }
 572
 573 void conv_jistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 574 {
 575         conv_jistoeuc(outbuf, outlen, inbuf);
 576         conv_unreadable_eucjp(outbuf);
 577 }
 578
 579 void conv_sjistodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 580 {
 581         conv_sjistoeuc(outbuf, outlen, inbuf);
 582         conv_unreadable_eucjp(outbuf);
 583 }
 584
 585 void conv_euctodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 586 {
 587         strncpy2(outbuf, inbuf, outlen);
 588         conv_unreadable_eucjp(outbuf);
 589 }
 590
 591 void conv_anytodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 592 {
 593         conv_anytoeuc(outbuf, outlen, inbuf);
 594         conv_unreadable_eucjp(outbuf);
 595 }
 596
 597 void conv_ustodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 598 {
 599         strncpy2(outbuf, inbuf, outlen);
 600         conv_unreadable_8bit(outbuf);
 601 }
 602
 603 void conv_latintodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 604 {
 605         strncpy2(outbuf, inbuf, outlen);
 606         conv_unreadable_latin(outbuf);
 607 }
 608
 609 void conv_noconv(gchar *outbuf, gint outlen, const gchar *inbuf)
 610 {
 611         strncpy2(outbuf, inbuf, outlen);
 612 }
 613
 614 void conv_localetodisp(gchar *outbuf, gint outlen, const gchar *inbuf)
 615 {
 616         strncpy2(outbuf, inbuf, outlen);
 617
 618         switch (conv_get_current_charset()) {
 619         case C_US_ASCII:
 620         case C_ISO_8859_1:
 621         case C_ISO_8859_2:
 622         case C_ISO_8859_3:
 623         case C_ISO_8859_4:
 624         case C_ISO_8859_5:
 625         case C_ISO_8859_6:
 626         case C_ISO_8859_7:
 627         case C_ISO_8859_8:
 628         case C_ISO_8859_9:
 629         case C_ISO_8859_10:
 630         case C_ISO_8859_11:
 631         case C_ISO_8859_13:
 632         case C_ISO_8859_14:
 633         case C_ISO_8859_15:
 634                 conv_unreadable_latin(outbuf);
 635                 break;
 636         case C_EUC_JP:
 637                 conv_unreadable_eucjp(outbuf);
 638                 break;
 639         default:
 640                 break;
 641         }
 642 }
 643
 644 CodeConverter *conv_code_converter_new(const gchar *charset)
 645 {
 646         CodeConverter *conv;
 647
 648         conv = g_new0(CodeConverter, 1);
 649         conv->code_conv_func = conv_get_code_conv_func(charset, NULL);
 650         conv->charset_str = g_strdup(charset);
 651         conv->charset = conv_get_charset_from_str(charset);
 652
 653         return conv;
 654 }
 655
 656 void conv_code_converter_destroy(CodeConverter *conv)
 657 {
 658         g_free(conv->charset_str);
 659         g_free(conv);
 660 }
 661
 662 gint conv_convert(CodeConverter *conv, gchar *outbuf, gint outlen,
 663                   const gchar *inbuf)
 664 {
 665 #if HAVE_ICONV
 666         if (conv->code_conv_func != conv_noconv)
 667                 conv->code_conv_func(outbuf, outlen, inbuf);
 668         else {
 669                 gchar *str;
 670
 671                 str = conv_iconv_strdup(inbuf, conv->charset_str, NULL);
 672                 if (!str)
 673                         return -1;
 674                 else {
 675                         strncpy2(outbuf, str, outlen);
 676                         g_free(str);
 677                 }
 678         }
 679 #else /* !HAVE_ICONV */
 680         conv->code_conv_func(outbuf, outlen, inbuf);
 681 #endif
 682
 683         return 0;
 684 }
 685
 686 gchar *conv_codeset_strdup(const gchar *inbuf,
 687                            const gchar *src_code, const gchar *dest_code)
 688 {
 689         gchar *buf;
 690         size_t len;
 691         CodeConvFunc conv_func;
 692
 693         conv_func = conv_get_code_conv_func(src_code, dest_code);
 694         if (conv_func != conv_noconv) {
 695                 len = (strlen(inbuf) + 1) * 3;
 696                 buf = g_malloc(len);
 697                 if (!buf) return NULL;
 698
 699                 conv_func(buf, len, inbuf);
 700                 return g_realloc(buf, strlen(buf) + 1);
 701         }
 702
 703 #if HAVE_ICONV
 704         return conv_iconv_strdup(inbuf, src_code, dest_code);
 705 #else
 706         return g_strdup(inbuf);
 707 #endif /* HAVE_ICONV */
 708 }
 709
 710 CodeConvFunc conv_get_code_conv_func(const gchar *src_charset_str,
 711                                      const gchar *dest_charset_str)
 712 {
 713         CodeConvFunc code_conv = conv_noconv;
 714         CharSet src_charset;
 715         CharSet dest_charset;
 716
 717         if (!src_charset_str)
 718                 src_charset = conv_get_current_charset();
 719         else
 720                 src_charset = conv_get_charset_from_str(src_charset_str);
 721
 722         /* auto detection mode */
 723         if (!src_charset_str && !dest_charset_str) {
 724                 if (src_charset == C_EUC_JP || src_charset == C_SHIFT_JIS)
 725                         return conv_anytodisp;
 726                 else
 727                         return conv_noconv;
 728         }
 729
 730         dest_charset = conv_get_charset_from_str(dest_charset_str);
 731
 732         if (dest_charset == C_US_ASCII)
 733                 return conv_ustodisp;
 734         else if (dest_charset == C_UTF_8 ||
 735                  (dest_charset == C_AUTO &&
 736                   conv_get_current_charset() == C_UTF_8))
 737                 return conv_noconv;
 738
 739         switch (src_charset) {
 740         case C_ISO_2022_JP:
 741         case C_ISO_2022_JP_2:
 742                 if (dest_charset == C_AUTO)
 743                         code_conv = conv_jistodisp;
 744                 else if (dest_charset == C_EUC_JP)
 745                         code_conv = conv_jistoeuc;
 746                 break;
 747         case C_US_ASCII:
 748                 if (dest_charset == C_AUTO)
 749                         code_conv = conv_ustodisp;
 750                 break;
 751         case C_ISO_8859_1:
 752         case C_ISO_8859_2:
 753         case C_ISO_8859_3:
 754         case C_ISO_8859_4:
 755         case C_ISO_8859_5:
 756         case C_ISO_8859_6:
 757         case C_ISO_8859_7:
 758         case C_ISO_8859_8:
 759         case C_ISO_8859_9:
 760         case C_ISO_8859_10:
 761         case C_ISO_8859_11:
 762         case C_ISO_8859_13:
 763         case C_ISO_8859_14:
 764         case C_ISO_8859_15:
 765                 if (dest_charset == C_AUTO)
 766                         code_conv = conv_latintodisp;
 767                 break;
 768         case C_SHIFT_JIS:
 769                 if (dest_charset == C_AUTO)
 770                         code_conv = conv_sjistodisp;
 771                 else if (dest_charset == C_EUC_JP)
 772                         code_conv = conv_sjistoeuc;
 773                 break;
 774         case C_EUC_JP:
 775                 if (dest_charset == C_AUTO)
 776                         code_conv = conv_euctodisp;
 777                 else if (dest_charset == C_ISO_2022_JP ||
 778                          dest_charset == C_ISO_2022_JP_2)
 779                         code_conv = conv_euctojis;
 780                 break;
 781         default:
 782                 break;
 783         }
 784
 785         return code_conv;
 786 }
 787
 788 #if HAVE_ICONV
 789 gchar *conv_iconv_strdup(const gchar *inbuf,
 790                          const gchar *src_code, const gchar *dest_code)
 791 {
 792         iconv_t cd;
 793         const gchar *inbuf_p;
 794         gchar *outbuf;
 795         gchar *outbuf_p;
 796         gint in_size;
 797         gint in_left;
 798         gint out_size;
 799         gint out_left;
 800         gint n_conv;
 801
 802         if (!src_code)
 803                 src_code = conv_get_outgoing_charset_str();
 804         if (!dest_code)
 805                 dest_code = conv_get_current_charset_str();
 806
 807         /* don't convert if current codeset is US-ASCII */
 808         if (!strcasecmp(dest_code, CS_US_ASCII))
 809                 return g_strdup(inbuf);
 810
 811         /* don't convert if src and dest codeset are identical */
 812         if (!strcasecmp(src_code, dest_code))
 813                 return g_strdup(inbuf);
 814
 815         cd = iconv_open(dest_code, src_code);
 816         if (cd == (iconv_t)-1)
 817                 return NULL;
 818
 819         inbuf_p = inbuf;
 820         in_size = strlen(inbuf) + 1;
 821         in_left = in_size;
 822         out_size = in_size * 2;
 823         outbuf = g_malloc(out_size);
 824         outbuf_p = outbuf;
 825         out_left = out_size;
 826
 827         while ((n_conv = iconv(cd, (ICONV_CONST gchar **)&inbuf_p, &in_left,
 828                                &outbuf_p, &out_left)) < 0) {
 829                 if (EILSEQ == errno) {
 830                         inbuf_p++;
 831                         in_left--;
 832                         *outbuf_p++ = SUBST_CHAR;
 833                         out_left--;
 834                 } else if (EINVAL == errno) {
 835                         *outbuf_p = '\0';
 836                         break;
 837                 } else if (E2BIG == errno) {
 838                         out_size *= 2;
 839                         outbuf = g_realloc(outbuf, out_size);
 840                         inbuf_p = inbuf;
 841                         in_left = in_size;
 842                         outbuf_p = outbuf;
 843                         out_left = out_size;
 844                 } else {
 845                         g_warning("conv_iconv_strdup(): %s\n",
 846                                   g_strerror(errno));
 847                         *outbuf_p = '\0';
 848                         break;
 849                 }
 850         }
 851
 852         iconv(cd, NULL, NULL, &outbuf_p, &out_left);
 853         outbuf = g_realloc(outbuf, strlen(outbuf) + 1);
 854
 855         iconv_close(cd);
 856
 857         return outbuf;
 858 }
 859 #endif /* HAVE_ICONV */
 860
 861 static const struct {
 862         CharSet charset;
 863         gchar *const name;
 864 } charsets[] = {
 865         {C_US_ASCII,            CS_US_ASCII},
 866         {C_US_ASCII,            CS_ANSI_X3_4_1968},
 867         {C_UTF_8,               CS_UTF_8},
 868         {C_ISO_8859_1,          CS_ISO_8859_1},
 869         {C_ISO_8859_2,          CS_ISO_8859_2},
 870         {C_ISO_8859_3,          CS_ISO_8859_3},
 871         {C_ISO_8859_4,          CS_ISO_8859_4},
 872         {C_ISO_8859_5,          CS_ISO_8859_5},
 873         {C_ISO_8859_6,          CS_ISO_8859_6},
 874         {C_ISO_8859_7,          CS_ISO_8859_7},
 875         {C_ISO_8859_8,          CS_ISO_8859_8},
 876         {C_ISO_8859_9,          CS_ISO_8859_9},
 877         {C_ISO_8859_10,         CS_ISO_8859_10},
 878         {C_ISO_8859_11,         CS_ISO_8859_11},
 879         {C_ISO_8859_13,         CS_ISO_8859_13},
 880         {C_ISO_8859_14,         CS_ISO_8859_14},
 881         {C_ISO_8859_15,         CS_ISO_8859_15},
 882         {C_BALTIC,              CS_BALTIC},
 883         {C_CP1250,              CS_CP1250},
 884         {C_CP1251,              CS_CP1251},
 885         {C_CP1252,              CS_CP1252},
 886         {C_CP1253,              CS_CP1253},
 887         {C_CP1254,              CS_CP1254},
 888         {C_CP1255,              CS_CP1255},
 889         {C_CP1256,              CS_CP1256},
 890         {C_CP1257,              CS_CP1257},
 891         {C_CP1258,              CS_CP1258},
 892         {C_WINDOWS_1250,        CS_WINDOWS_1250},
 893         {C_WINDOWS_1251,        CS_WINDOWS_1251},
 894         {C_WINDOWS_1252,        CS_WINDOWS_1252},
 895         {C_WINDOWS_1253,        CS_WINDOWS_1253},
 896         {C_WINDOWS_1254,        CS_WINDOWS_1254},
 897         {C_WINDOWS_1255,        CS_WINDOWS_1255},
 898         {C_WINDOWS_1256,        CS_WINDOWS_1256},
 899         {C_WINDOWS_1257,        CS_WINDOWS_1257},
 900         {C_WINDOWS_1258,        CS_WINDOWS_1258},
 901         {C_KOI8_R,              CS_KOI8_R},
 902         {C_KOI8_T,              CS_KOI8_T},
 903         {C_KOI8_U,              CS_KOI8_U},
 904         {C_ISO_2022_JP,         CS_ISO_2022_JP},
 905         {C_ISO_2022_JP_2,       CS_ISO_2022_JP_2},
 906         {C_EUC_JP,              CS_EUC_JP},
 907         {C_EUC_JP,              CS_EUCJP},
 908         {C_SHIFT_JIS,           CS_SHIFT_JIS},
 909         {C_SHIFT_JIS,           CS_SHIFT__JIS},
 910         {C_SHIFT_JIS,           CS_SJIS},
 911         {C_ISO_2022_KR,         CS_ISO_2022_KR},
 912         {C_EUC_KR,              CS_EUC_KR},
 913         {C_ISO_2022_CN,         CS_ISO_2022_CN},
 914         {C_EUC_CN,              CS_EUC_CN},
 915         {C_GB2312,              CS_GB2312},
 916         {C_GBK,                 CS_GBK},
 917         {C_EUC_TW,              CS_EUC_TW},
 918         {C_BIG5,                CS_BIG5},
 919         {C_BIG5_HKSCS,          CS_BIG5_HKSCS},
 920         {C_TIS_620,             CS_TIS_620},
 921         {C_WINDOWS_874,         CS_WINDOWS_874},
 922         {C_GEORGIAN_PS,         CS_GEORGIAN_PS},
 923         {C_TCVN5712_1,          CS_TCVN5712_1},
 924 };
 925
 926 static const struct {
 927         gchar *const locale;
 928         CharSet charset;
 929         CharSet out_charset;
 930 } locale_table[] = {
 931         {"ja_JP.eucJP"  , C_EUC_JP      , C_ISO_2022_JP},
 932         {"ja_JP.EUC-JP" , C_EUC_JP      , C_ISO_2022_JP},
 933         {"ja_JP.EUC"    , C_EUC_JP      , C_ISO_2022_JP},
 934         {"ja_JP.ujis"   , C_EUC_JP      , C_ISO_2022_JP},
 935         {"ja_JP.SJIS"   , C_SHIFT_JIS   , C_ISO_2022_JP},
 936         {"ja_JP.JIS"    , C_ISO_2022_JP , C_ISO_2022_JP},
 937         {"ja_JP"        , C_EUC_JP      , C_ISO_2022_JP},
 938         {"ko_KR.EUC-KR" , C_EUC_KR      , C_EUC_KR},
 939         {"ko_KR"        , C_EUC_KR      , C_EUC_KR},
 940         {"zh_CN.GB2312" , C_GB2312      , C_GB2312},
 941         {"zh_CN.GBK"    , C_GBK         , C_GB2312},
 942         {"zh_CN"        , C_GB2312      , C_GB2312},
 943         {"zh_HK"        , C_BIG5_HKSCS  , C_BIG5_HKSCS},
 944         {"zh_TW.eucTW"  , C_EUC_TW      , C_BIG5},
 945         {"zh_TW.EUC-TW" , C_EUC_TW      , C_BIG5},
 946         {"zh_TW.Big5"   , C_BIG5        , C_BIG5},
 947         {"zh_TW"        , C_BIG5        , C_BIG5},
 948
 949         {"ru_RU.KOI8-R" , C_KOI8_R      , C_KOI8_R},
 950         {"ru_RU.KOI8R"  , C_KOI8_R      , C_KOI8_R},
 951         {"ru_RU.CP1251" , C_WINDOWS_1251, C_KOI8_R},
 952         {"ru_RU"        , C_ISO_8859_5  , C_KOI8_R},
 953         {"tg_TJ"        , C_KOI8_T      , C_KOI8_T},
 954         {"ru_UA"        , C_KOI8_U      , C_KOI8_U},
 955         {"uk_UA"        , C_KOI8_U      , C_KOI8_U},
 956
 957         {"be_BY"        , C_WINDOWS_1251, C_WINDOWS_1251},
 958         {"bg_BG"        , C_WINDOWS_1251, C_WINDOWS_1251},
 959
 960         {"yi_US"        , C_WINDOWS_1255, C_WINDOWS_1255},
 961
 962         {"af_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
 963         {"br_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
 964         {"ca_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
 965         {"da_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
 966         {"de_AT"        , C_ISO_8859_1  , C_ISO_8859_1},
 967         {"de_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
 968         {"de_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
 969         {"de_DE"        , C_ISO_8859_1  , C_ISO_8859_1},
 970         {"de_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
 971         {"en_AU"        , C_ISO_8859_1  , C_ISO_8859_1},
 972         {"en_BW"        , C_ISO_8859_1  , C_ISO_8859_1},
 973         {"en_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
 974         {"en_DK"        , C_ISO_8859_1  , C_ISO_8859_1},
 975         {"en_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
 976         {"en_HK"        , C_ISO_8859_1  , C_ISO_8859_1},
 977         {"en_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
 978         {"en_NZ"        , C_ISO_8859_1  , C_ISO_8859_1},
 979         {"en_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
 980         {"en_SG"        , C_ISO_8859_1  , C_ISO_8859_1},
 981         {"en_US"        , C_ISO_8859_1  , C_ISO_8859_1},
 982         {"en_ZA"        , C_ISO_8859_1  , C_ISO_8859_1},
 983         {"en_ZW"        , C_ISO_8859_1  , C_ISO_8859_1},
 984         {"es_AR"        , C_ISO_8859_1  , C_ISO_8859_1},
 985         {"es_BO"        , C_ISO_8859_1  , C_ISO_8859_1},
 986         {"es_CL"        , C_ISO_8859_1  , C_ISO_8859_1},
 987         {"es_CO"        , C_ISO_8859_1  , C_ISO_8859_1},
 988         {"es_CR"        , C_ISO_8859_1  , C_ISO_8859_1},
 989         {"es_DO"        , C_ISO_8859_1  , C_ISO_8859_1},
 990         {"es_EC"        , C_ISO_8859_1  , C_ISO_8859_1},
 991         {"es_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
 992         {"es_GT"        , C_ISO_8859_1  , C_ISO_8859_1},
 993         {"es_HN"        , C_ISO_8859_1  , C_ISO_8859_1},
 994         {"es_MX"        , C_ISO_8859_1  , C_ISO_8859_1},
 995         {"es_NI"        , C_ISO_8859_1  , C_ISO_8859_1},
 996         {"es_PA"        , C_ISO_8859_1  , C_ISO_8859_1},
 997         {"es_PE"        , C_ISO_8859_1  , C_ISO_8859_1},
 998         {"es_PR"        , C_ISO_8859_1  , C_ISO_8859_1},
 999         {"es_PY"        , C_ISO_8859_1  , C_ISO_8859_1},
1000         {"es_SV"        , C_ISO_8859_1  , C_ISO_8859_1},
1001         {"es_US"        , C_ISO_8859_1  , C_ISO_8859_1},
1002         {"es_UY"        , C_ISO_8859_1  , C_ISO_8859_1},
1003         {"es_VE"        , C_ISO_8859_1  , C_ISO_8859_1},
1004         {"et_EE"        , C_ISO_8859_1  , C_ISO_8859_1},
1005         {"eu_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1006         {"fi_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1007         {"fo_FO"        , C_ISO_8859_1  , C_ISO_8859_1},
1008         {"fr_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1009         {"fr_CA"        , C_ISO_8859_1  , C_ISO_8859_1},
1010         {"fr_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1011         {"fr_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1012         {"fr_LU"        , C_ISO_8859_1  , C_ISO_8859_1},
1013         {"ga_IE"        , C_ISO_8859_1  , C_ISO_8859_1},
1014         {"gl_ES"        , C_ISO_8859_1  , C_ISO_8859_1},
1015         {"gv_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1016         {"id_ID"        , C_ISO_8859_1  , C_ISO_8859_1},
1017         {"is_IS"        , C_ISO_8859_1  , C_ISO_8859_1},
1018         {"it_CH"        , C_ISO_8859_1  , C_ISO_8859_1},
1019         {"it_IT"        , C_ISO_8859_1  , C_ISO_8859_1},
1020         {"kl_GL"        , C_ISO_8859_1  , C_ISO_8859_1},
1021         {"kw_GB"        , C_ISO_8859_1  , C_ISO_8859_1},
1022         {"ms_MY"        , C_ISO_8859_1  , C_ISO_8859_1},
1023         {"nl_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1024         {"nl_NL"        , C_ISO_8859_1  , C_ISO_8859_1},
1025         {"nn_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1026         {"no_NO"        , C_ISO_8859_1  , C_ISO_8859_1},
1027         {"oc_FR"        , C_ISO_8859_1  , C_ISO_8859_1},
1028         {"pt_BR"        , C_ISO_8859_1  , C_ISO_8859_1},
1029         {"pt_PT"        , C_ISO_8859_1  , C_ISO_8859_1},
1030         {"sq_AL"        , C_ISO_8859_1  , C_ISO_8859_1},
1031         {"sv_FI"        , C_ISO_8859_1  , C_ISO_8859_1},
1032         {"sv_SE"        , C_ISO_8859_1  , C_ISO_8859_1},
1033         {"tl_PH"        , C_ISO_8859_1  , C_ISO_8859_1},
1034         {"uz_UZ"        , C_ISO_8859_1  , C_ISO_8859_1},
1035         {"wa_BE"        , C_ISO_8859_1  , C_ISO_8859_1},
1036
1037         {"bs_BA"        , C_ISO_8859_2  , C_ISO_8859_2},
1038         {"cs_CZ"        , C_ISO_8859_2  , C_ISO_8859_2},
1039         {"hr_HR"        , C_ISO_8859_2  , C_ISO_8859_2},
1040         {"hu_HU"        , C_ISO_8859_2  , C_ISO_8859_2},
1041         {"pl_PL"        , C_ISO_8859_2  , C_ISO_8859_2},
1042         {"ro_RO"        , C_ISO_8859_2  , C_ISO_8859_2},
1043         {"sk_SK"        , C_ISO_8859_2  , C_ISO_8859_2},
1044         {"sl_SI"        , C_ISO_8859_2  , C_ISO_8859_2},
1045
1046         {"sr_YU@cyrillic"       , C_ISO_8859_5  , C_ISO_8859_5},
1047         {"sr_YU"                , C_ISO_8859_2  , C_ISO_8859_2},
1048
1049         {"mt_MT"                , C_ISO_8859_3  , C_ISO_8859_3},
1050
1051         {"lt_LT.iso88594"       , C_ISO_8859_4  , C_ISO_8859_4},
1052         {"lt_LT.ISO8859-4"      , C_ISO_8859_4  , C_ISO_8859_4},
1053         {"lt_LT.ISO_8859-4"     , C_ISO_8859_4  , C_ISO_8859_4},
1054         {"lt_LT"                , C_ISO_8859_13 , C_ISO_8859_13},
1055
1056         {"mk_MK"        , C_ISO_8859_5  , C_ISO_8859_5},
1057
1058         {"ar_AE"        , C_ISO_8859_6  , C_ISO_8859_6},
1059         {"ar_BH"        , C_ISO_8859_6  , C_ISO_8859_6},
1060         {"ar_DZ"        , C_ISO_8859_6  , C_ISO_8859_6},
1061         {"ar_EG"        , C_ISO_8859_6  , C_ISO_8859_6},
1062         {"ar_IQ"        , C_ISO_8859_6  , C_ISO_8859_6},
1063         {"ar_JO"        , C_ISO_8859_6  , C_ISO_8859_6},
1064         {"ar_KW"        , C_ISO_8859_6  , C_ISO_8859_6},
1065         {"ar_LB"        , C_ISO_8859_6  , C_ISO_8859_6},
1066         {"ar_LY"        , C_ISO_8859_6  , C_ISO_8859_6},
1067         {"ar_MA"        , C_ISO_8859_6  , C_ISO_8859_6},
1068         {"ar_OM"        , C_ISO_8859_6  , C_ISO_8859_6},
1069         {"ar_QA"        , C_ISO_8859_6  , C_ISO_8859_6},
1070         {"ar_SA"        , C_ISO_8859_6  , C_ISO_8859_6},
1071         {"ar_SD"        , C_ISO_8859_6  , C_ISO_8859_6},
1072         {"ar_SY"        , C_ISO_8859_6  , C_ISO_8859_6},
1073         {"ar_TN"        , C_ISO_8859_6  , C_ISO_8859_6},
1074         {"ar_YE"        , C_ISO_8859_6  , C_ISO_8859_6},
1075
1076         {"el_GR"        , C_ISO_8859_7  , C_ISO_8859_7},
1077         {"he_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1078         {"iw_IL"        , C_ISO_8859_8  , C_ISO_8859_8},
1079         {"tr_TR"        , C_ISO_8859_9  , C_ISO_8859_9},
1080
1081         {"lv_LV"        , C_ISO_8859_13 , C_ISO_8859_13},
1082         {"mi_NZ"        , C_ISO_8859_13 , C_ISO_8859_13},
1083
1084         {"cy_GB"        , C_ISO_8859_14 , C_ISO_8859_14},
1085
1086         {"ar_IN"        , C_UTF_8       , C_UTF_8},
1087         {"en_IN"        , C_UTF_8       , C_UTF_8},
1088         {"se_NO"        , C_UTF_8       , C_UTF_8},
1089         {"ta_IN"        , C_UTF_8       , C_UTF_8},
1090         {"te_IN"        , C_UTF_8       , C_UTF_8},
1091         {"ur_PK"        , C_UTF_8       , C_UTF_8},
1092
1093         {"th_TH"        , C_TIS_620     , C_TIS_620},
1094         /* {"th_TH"     , C_WINDOWS_874}, */
1095         /* {"th_TH"     , C_ISO_8859_11}, */
1096
1097         {"ka_GE"        , C_GEORGIAN_PS , C_GEORGIAN_PS},
1098         {"vi_VN.TCVN"   , C_TCVN5712_1  , C_TCVN5712_1},
1099
1100         {"C"                    , C_US_ASCII    , C_US_ASCII},
1101         {"POSIX"                , C_US_ASCII    , C_US_ASCII},
1102         {"ANSI_X3.4-1968"       , C_US_ASCII    , C_US_ASCII},
1103 };
1104
1105 static GHashTable *conv_get_charset_to_str_table(void)
1106 {
1107         static GHashTable *table;
1108         gint i;
1109
1110         if (table)
1111                 return table;
1112
1113         table = g_hash_table_new(NULL, g_direct_equal);
1114
1115         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1116                 if (g_hash_table_lookup(table, GUINT_TO_POINTER(charsets[i].charset))
1117                     == NULL) {
1118                         g_hash_table_insert
1119                                 (table, GUINT_TO_POINTER(charsets[i].charset),
1120                                  charsets[i].name);
1121                 }
1122         }
1123
1124         return table;
1125 }
1126
1127 static gint str_case_equal(gconstpointer v, gconstpointer v2)
1128 {
1129         return strcasecmp((const gchar *)v, (const gchar *)v2) == 0;
1130 }
1131
1132 static guint str_case_hash(gconstpointer key)
1133 {
1134         const gchar *p = key;
1135         guint h = *p;
1136
1137         if (h) {
1138                 h = tolower(h);
1139                 for (p += 1; *p != '\0'; p++)
1140                         h = (h << 5) - h + tolower(*p);
1141         }
1142
1143         return h;
1144 }
1145
1146 static GHashTable *conv_get_charset_from_str_table(void)
1147 {
1148         static GHashTable *table;
1149         gint i;
1150
1151         if (table)
1152                 return table;
1153
1154         table = g_hash_table_new(str_case_hash, str_case_equal);
1155
1156         for (i = 0; i < sizeof(charsets) / sizeof(charsets[0]); i++) {
1157                 g_hash_table_insert(table, charsets[i].name,
1158                                     GUINT_TO_POINTER(charsets[i].charset));
1159         }
1160
1161         return table;
1162 }
1163
1164 const gchar *conv_get_charset_str(CharSet charset)
1165 {
1166         GHashTable *table;
1167
1168         table = conv_get_charset_to_str_table();
1169         return g_hash_table_lookup(table, GUINT_TO_POINTER(charset));
1170 }
1171
1172 CharSet conv_get_charset_from_str(const gchar *charset)
1173 {
1174         GHashTable *table;
1175
1176         if (!charset) return C_AUTO;
1177
1178         table = conv_get_charset_from_str_table();
1179         return GPOINTER_TO_UINT(g_hash_table_lookup(table, charset));
1180 }
1181
1182 CharSet conv_get_current_charset(void)
1183 {
1184         static CharSet cur_charset = -1;
1185         const gchar *cur_locale;
1186         const gchar *p;
1187         gint i;
1188
1189         if (cur_charset != -1)
1190                 return cur_charset;
1191
1192         cur_locale = conv_get_current_locale();
1193         if (!cur_locale) {
1194                 cur_charset = C_US_ASCII;
1195                 return cur_charset;
1196         }
1197
1198         if (strcasestr(cur_locale, "UTF-8")) {
1199                 cur_charset = C_UTF_8;
1200                 return cur_charset;
1201         }
1202
1203         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1204                 cur_charset = C_ISO_8859_15;
1205                 return cur_charset;
1206         }
1207
1208         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1209                 const gchar *p;
1210
1211                 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1212                    "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1213                 if (!strncasecmp(cur_locale, locale_table[i].locale,
1214                                  strlen(locale_table[i].locale))) {
1215                         cur_charset = locale_table[i].charset;
1216                         return cur_charset;
1217                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1218                          !strchr(p + 1, '.')) {
1219                         if (strlen(cur_locale) == 2 &&
1220                             !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1221                                 cur_charset = locale_table[i].charset;
1222                                 return cur_charset;
1223                         }
1224                 }
1225         }
1226
1227         cur_charset = C_AUTO;
1228         return cur_charset;
1229 }
1230
1231 const gchar *conv_get_current_charset_str(void)
1232 {
1233         static const gchar *codeset = NULL;
1234
1235         if (!codeset)
1236                 codeset = conv_get_charset_str(conv_get_current_charset());
1237
1238         return codeset ? codeset : CS_US_ASCII;
1239 }
1240
1241 CharSet conv_get_outgoing_charset(void)
1242 {
1243         static CharSet out_charset = -1;
1244         const gchar *cur_locale;
1245         const gchar *p;
1246         gint i;
1247
1248         if (out_charset != -1)
1249                 return out_charset;
1250
1251         cur_locale = conv_get_current_locale();
1252         if (!cur_locale) {
1253                 out_charset = C_AUTO;
1254                 return out_charset;
1255         }
1256
1257         if ((p = strcasestr(cur_locale, "@euro")) && p[5] == '\0') {
1258                 out_charset = C_ISO_8859_15;
1259                 return out_charset;
1260         }
1261
1262         for (i = 0; i < sizeof(locale_table) / sizeof(locale_table[0]); i++) {
1263                 const gchar *p;
1264
1265                 if (!strncasecmp(cur_locale, locale_table[i].locale,
1266                                  strlen(locale_table[i].locale))) {
1267                         out_charset = locale_table[i].out_charset;
1268                         break;
1269                 } else if ((p = strchr(locale_table[i].locale, '_')) &&
1270                          !strchr(p + 1, '.')) {
1271                         if (strlen(cur_locale) == 2 &&
1272                             !strncasecmp(cur_locale, locale_table[i].locale, 2)) {
1273                                 out_charset = locale_table[i].out_charset;
1274                                 break;
1275                         }
1276                 }
1277         }
1278
1279 #if !HAVE_ICONV
1280         /* encoding conversion without iconv() is only supported
1281            on Japanese locale for now */
1282         if (out_charset == C_ISO_2022_JP)
1283                 return out_charset;
1284         else
1285                 return conv_get_current_charset();
1286 #endif
1287
1288         return out_charset;
1289 }
1290
1291 const gchar *conv_get_outgoing_charset_str(void)
1292 {
1293         CharSet out_charset;
1294         const gchar *str;
1295
1296         if (prefs_common.outgoing_charset) {
1297                 if (!isalpha(prefs_common.outgoing_charset[0])) {
1298                         g_free(prefs_common.outgoing_charset);
1299                         prefs_common.outgoing_charset = g_strdup(CS_AUTO);
1300                 } else if (strcmp(prefs_common.outgoing_charset, CS_AUTO) != 0)
1301                         return prefs_common.outgoing_charset;
1302         }
1303
1304         out_charset = conv_get_outgoing_charset();
1305         str = conv_get_charset_str(out_charset);
1306
1307         return str ? str : CS_US_ASCII;
1308 }
1309
1310 gboolean conv_is_multibyte_encoding(CharSet encoding)
1311 {
1312         switch (encoding) {
1313         case C_EUC_JP:
1314         case C_EUC_KR:
1315         case C_EUC_TW:
1316         case C_EUC_CN:
1317         case C_ISO_2022_JP:
1318         case C_ISO_2022_JP_2:
1319         case C_ISO_2022_KR:
1320         case C_ISO_2022_CN:
1321         case C_SHIFT_JIS:
1322         case C_GB2312:
1323         case C_BIG5:
1324         case C_UTF_8:
1325                 return TRUE;
1326         default:
1327                 return FALSE;
1328         }
1329 }
1330
1331 const gchar *conv_get_current_locale(void)
1332 {
1333         gchar *cur_locale;
1334
1335         cur_locale = g_getenv("LC_ALL");
1336         if (!cur_locale) cur_locale = g_getenv("LC_CTYPE");
1337         if (!cur_locale) cur_locale = g_getenv("LANG");
1338         if (!cur_locale) cur_locale = setlocale(LC_CTYPE, NULL);
1339
1340         debug_print("current locale: %s\n",
1341                     cur_locale ? cur_locale : "(none)");
1342
1343         return cur_locale;
1344 }
1345
1346 void conv_unmime_header_overwrite(gchar *str)
1347 {
1348         gchar *buf;
1349         gint buflen;
1350         CharSet cur_charset;
1351
1352         cur_charset = conv_get_current_charset();
1353
1354         if (cur_charset == C_EUC_JP) {
1355                 buflen = strlen(str) * 2 + 1;
1356                 Xalloca(buf, buflen, return);
1357                 conv_anytodisp(buf, buflen, str);
1358                 unmime_header(str, buf);
1359         } else {
1360                 buflen = strlen(str) + 1;
1361                 Xalloca(buf, buflen, return);
1362                 unmime_header(buf, str);
1363                 strncpy2(str, buf, buflen);
1364         }
1365 }
1366
1367 void conv_unmime_header(gchar *outbuf, gint outlen, const gchar *str,
1368                         const gchar *charset)
1369 {
1370         CharSet cur_charset;
1371
1372         cur_charset = conv_get_current_charset();
1373
1374         if (cur_charset == C_EUC_JP) {
1375                 gchar *buf;
1376                 gint buflen;
1377
1378                 buflen = strlen(str) * 2 + 1;
1379                 Xalloca(buf, buflen, return);
1380                 conv_anytodisp(buf, buflen, str);
1381                 unmime_header(outbuf, buf);
1382         } else
1383                 unmime_header(outbuf, str);
1384 }
1385
1386 #define MAX_LINELEN             76
1387 #define MAX_HARD_LINELEN        996
1388 #define MIMESEP_BEGIN           "=?"
1389 #define MIMESEP_END             "?="
1390
1391 #define B64LEN(len)     ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1392
1393 #define LBREAK_IF_REQUIRED(cond, is_plain_text)                         \
1394 {                                                                       \
1395         if (len - (destp - dest) < MAX_LINELEN + 2) {                   \
1396                 *destp = '\0';                                          \
1397                 return;                                                 \
1398         }                                                               \
1399                                                                         \
1400         if ((cond) && *srcp) {                                          \
1401                 if (destp > dest && left < MAX_LINELEN - 1) {           \
1402                         if (isspace(*(destp - 1)))                      \
1403                                 destp--;                                \
1404                         else if (is_plain_text && isspace(*srcp))       \
1405                                 srcp++;                                 \
1406                         if (*srcp) {                                    \
1407                                 *destp++ = '\n';                        \
1408                                 *destp++ = ' ';                         \
1409                                 left = MAX_LINELEN - 1;                 \
1410                         }                                               \
1411                 }                                                       \
1412         }                                                               \
1413 }
1414
1415 void conv_encode_header(gchar *dest, gint len, const gchar *src,
1416                         gint header_len)
1417 {
1418         const gchar *cur_encoding;
1419         const gchar *out_encoding;
1420         gint mimestr_len;
1421         gchar *mimesep_enc;
1422         gint left;
1423         const gchar *srcp = src;
1424         gchar *destp = dest;
1425         gboolean use_base64;
1426
1427         if (MB_CUR_MAX > 1) {
1428                 use_base64 = TRUE;
1429                 mimesep_enc = "?B?";
1430         } else {
1431                 use_base64 = FALSE;
1432                 mimesep_enc = "?Q?";
1433         }
1434
1435         cur_encoding = conv_get_current_charset_str();
1436         if (!strcmp(cur_encoding, CS_US_ASCII))
1437                 cur_encoding = CS_ISO_8859_1;
1438         out_encoding = conv_get_outgoing_charset_str();
1439         if (!strcmp(out_encoding, CS_US_ASCII))
1440                 out_encoding = CS_ISO_8859_1;
1441
1442         mimestr_len = strlen(MIMESEP_BEGIN) + strlen(out_encoding) +
1443                 strlen(mimesep_enc) + strlen(MIMESEP_END);
1444
1445         left = MAX_LINELEN - header_len;
1446
1447         while (*srcp) {
1448                 LBREAK_IF_REQUIRED(left <= 0, TRUE);
1449
1450                 while (isspace(*srcp)) {
1451                         *destp++ = *srcp++;
1452                         left--;
1453                         LBREAK_IF_REQUIRED(left <= 0, TRUE);
1454                 }
1455
1456                 /* output as it is if the next word is ASCII string */
1457                 if (!is_next_nonascii(srcp)) {
1458                         gint word_len;
1459
1460                         word_len = get_next_word_len(srcp);
1461                         LBREAK_IF_REQUIRED(left < word_len, TRUE);
1462                         while (word_len > 0) {
1463                                 LBREAK_IF_REQUIRED(left + (MAX_HARD_LINELEN - MAX_LINELEN) <= 0, TRUE)
1464                                 *destp++ = *srcp++;
1465                                 left--;
1466                                 word_len--;
1467                         }
1468
1469                         continue;
1470                 }
1471
1472                 while (1) {
1473                         gint mb_len = 0;
1474                         gint cur_len = 0;
1475                         gchar *part_str;
1476                         gchar *out_str;
1477                         gchar *enc_str;
1478                         const gchar *p = srcp;
1479                         gint out_str_len;
1480                         gint out_enc_str_len;
1481                         gint mime_block_len;
1482                         gboolean cont = FALSE;
1483
1484                         while (*p != '\0') {
1485                                 if (isspace(*p) && !is_next_nonascii(p + 1))
1486                                         break;
1487
1488                                 if (MB_CUR_MAX > 1) {
1489                                         mb_len = mblen(p, MB_CUR_MAX);
1490                                         if (mb_len < 0) {
1491                                                 g_warning("conv_encode_header(): invalid multibyte character encountered\n");
1492                                                 mb_len = 1;
1493                                         }
1494                                 } else
1495                                         mb_len = 1;
1496
1497                                 Xstrndup_a(part_str, srcp, cur_len + mb_len, );
1498                                 out_str = conv_codeset_strdup
1499                                         (part_str, cur_encoding, out_encoding);
1500                                 if (!out_str) {
1501                                         g_warning("conv_encode_header(): code conversion failed\n");
1502                                         conv_unreadable_8bit(part_str);
1503                                         out_str = g_strdup(part_str);
1504                                 }
1505                                 out_str_len = strlen(out_str);
1506
1507                                 if (use_base64)
1508                                         out_enc_str_len = B64LEN(out_str_len);
1509                                 else
1510                                         out_enc_str_len =
1511                                                 qp_get_q_encoding_len(out_str);
1512
1513                                 g_free(out_str);
1514
1515                                 if (mimestr_len + out_enc_str_len <= left) {
1516                                         cur_len += mb_len;
1517                                         p += mb_len;
1518                                 } else if (cur_len == 0) {
1519                                         LBREAK_IF_REQUIRED(1, FALSE);
1520                                         continue;
1521                                 } else {
1522                                         cont = TRUE;
1523                                         break;
1524                                 }
1525                         }
1526
1527                         if (cur_len > 0) {
1528                                 Xstrndup_a(part_str, srcp, cur_len, );
1529                                 out_str = conv_codeset_strdup
1530                                         (part_str, cur_encoding, out_encoding);
1531                                 if (!out_str) {
1532                                         g_warning("conv_encode_header(): code conversion failed\n");
1533                                         conv_unreadable_8bit(part_str);
1534                                         out_str = g_strdup(part_str);
1535                                 }
1536                                 out_str_len = strlen(out_str);
1537
1538                                 if (use_base64)
1539                                         out_enc_str_len = B64LEN(out_str_len);
1540                                 else
1541                                         out_enc_str_len =
1542                                                 qp_get_q_encoding_len(out_str);
1543
1544                                 Xalloca(enc_str, out_enc_str_len + 1, );
1545                                 if (use_base64)
1546                                         base64_encode(enc_str, out_str, out_str_len);
1547                                 else
1548                                         qp_q_encode(enc_str, out_str);
1549
1550                                 g_free(out_str);
1551
1552                                 /* output MIME-encoded string block */
1553                                 mime_block_len = mimestr_len + strlen(enc_str);
1554                                 g_snprintf(destp, mime_block_len + 1,
1555                                            MIMESEP_BEGIN "%s%s%s" MIMESEP_END,
1556                                            out_encoding, mimesep_enc, enc_str);
1557                                 destp += mime_block_len;
1558                                 srcp += cur_len;
1559
1560                                 left -= mime_block_len;
1561                         }
1562
1563                         LBREAK_IF_REQUIRED(cont, FALSE);
1564
1565                         if (cur_len == 0)
1566                                 break;
1567                 }
1568         }
1569
1570         *destp = '\0';
1571 }
1572
1573 #undef LBREAK_IF_REQUIRED