static gint conv_jistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
{
const guchar *in = inbuf;
- guchar *out = outbuf;
+ gchar *out = outbuf;
JISState state = JIS_ASCII;
- while (*in != '\0') {
+ /*
+ * Loop outputs up to 3 bytes in each pass (aux kanji) and we
+ * need 1 byte to terminate the output
+ */
+ while (*in != '\0' && (out - outbuf) < outlen - 4) {
if (*in == ESC) {
in++;
if (*in == '$') {
static gint conv_euctojis(gchar *outbuf, gint outlen, const gchar *inbuf)
{
const guchar *in = inbuf;
- guchar *out = outbuf;
+ gchar *out = outbuf;
JISState state = JIS_ASCII;
- while (*in != '\0') {
+ /*
+ * Loop outputs up to 6 bytes in each pass (aux shift + aux
+ * kanji) and we need up to 4 bytes to terminate the output
+ * (ASCII shift + null)
+ */
+ while (*in != '\0' && (out - outbuf) < outlen - 10) {
if (IS_ASCII(*in)) {
K_OUT();
*out++ = *in++;
static gint conv_sjistoeuc(gchar *outbuf, gint outlen, const gchar *inbuf)
{
const guchar *in = inbuf;
- guchar *out = outbuf;
+ gchar *out = outbuf;
- while (*in != '\0') {
+ /*
+ * Loop outputs up to 2 bytes in each pass and we need 1 byte
+ * to terminate the output
+ */
+ while (*in != '\0' && (out - outbuf) < outlen - 3) {
if (IS_ASCII(*in)) {
*out++ = *in++;
} else if (issjiskanji1(*in)) {
if (cd == (iconv_t)-1) {
cd = iconv_open(CS_UTF_8, CS_EUC_JP);
if (cd == (iconv_t)-1) {
- g_warning("conv_euctoutf8(): %s\n",
+ g_warning("conv_euctoutf8(): %s",
g_strerror(errno));
iconv_ok = FALSE;
strncpy2(outbuf, inbuf, outlen);
if (cd == (iconv_t)-1) {
cd = iconv_open(CS_EUC_JP, CS_UTF_8);
if (cd == (iconv_t)-1) {
- g_warning("conv_utf8toeuc(): %s\n",
+ g_warning("conv_utf8toeuc(): %s",
g_strerror(errno));
iconv_ok = FALSE;
strncpy2(outbuf, inbuf, outlen);
{
if (encoding && (encoding[0] == 'X' || encoding[0] == 'x') &&
encoding[1] == '-') {
+ if (!g_ascii_strcasecmp(encoding, CS_X_MACCYR))
+ return CS_MACCYR;
if (!g_ascii_strcasecmp(encoding, CS_X_GBK))
return CS_GBK;
}
size_t len;
CodeConvFunc conv_func;
- if (!strcmp2(src_code, dest_code))
+ if (!strcmp2(src_code, dest_code)) {
+ CharSet dest_charset = conv_get_charset_from_str(dest_code);
+ if (strict_mode && dest_charset == C_UTF_8) {
+ /* ensure valid UTF-8 if target is UTF-8 */
+ if (!g_utf8_validate(inbuf, -1, NULL)) {
+ return NULL;
+ }
+ }
+ /* otherwise, try for a lucky day */
return g_strdup(inbuf);
+ }
src_code = conv_get_fallback_for_private_encoding(src_code);
conv_func = conv_get_code_conv_func(src_code, dest_code);
} else if (E2BIG == errno) {
EXPAND_BUF();
} else {
- g_warning("conv_iconv_strdup(): %s\n",
+ g_warning("conv_iconv_strdup(): %s",
g_strerror(errno));
break;
}
if (E2BIG == errno) {
EXPAND_BUF();
} else {
- g_warning("conv_iconv_strdup(): %s\n",
+ g_warning("conv_iconv_strdup(): %s",
g_strerror(errno));
break;
}
{C_WINDOWS_1257, CS_WINDOWS_1257},
{C_WINDOWS_1258, CS_WINDOWS_1258},
{C_KOI8_R, CS_KOI8_R},
+ {C_MACCYR, CS_MACCYR},
{C_KOI8_T, CS_KOI8_T},
{C_KOI8_U, CS_KOI8_U},
{C_ISO_2022_JP, CS_ISO_2022_JP},
left = MAX_LINELEN - 1; \
} \
} else if (destp == (guchar *)dest && left < 7) { \
- if (isspace(*(destp - 1))) \
- destp--; \
- else if (is_plain_text && isspace(*srcp)) \
+ if (is_plain_text && isspace(*srcp)) \
srcp++; \
if (*srcp) { \
*destp++ = '\n'; \
*dest = '\0';
return;
} else {
- g_warning("conv_encode_header(): code conversion failed\n");
+ g_warning("conv_encode_header(): code conversion failed");
conv_unreadable_8bit(part_str);
out_str = g_strdup(part_str);
}
out_str = conv_codeset_strdup
(part_str, cur_encoding, out_encoding);
if (!out_str) {
- g_warning("conv_encode_header(): code conversion failed\n");
+ g_warning("conv_encode_header(): code conversion failed");
conv_unreadable_8bit(part_str);
out_str = g_strdup(part_str);
}
utf8_file = g_filename_to_utf8(fs_file, -1, NULL, NULL, &error);
if (error) {
- g_warning("failed to convert encoding of file name: %s\n",
+ g_warning("failed to convert encoding of file name: %s",
error->message);
g_error_free(error);
}