2 * Claws Mail -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 2017 Ricardo Mones and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "claws-features.h"
25 #define ENTITY_MAX_LEN 8
26 #define DECODED_MAX_LEN 6
28 static GHashTable *symbol_table = NULL;
30 typedef struct _EntitySymbol EntitySymbol;
38 /* in alphabetical order with upper-case version first */
39 static EntitySymbol symbolic_entities[] = {
103 {"emsp", "\xE2\x80\x83"},
104 {"ensp", "\xE2\x80\x82"},
166 {"lrm", "\xE2\x80\x8E"},
180 {"nbsp", "\xC2\xA0"},
249 {"rlm", "\xE2\x80\x8F"},
280 {"thinsp", "\xE2\x80\x89"},
315 {"zwj", "\xE2\x80\x8D"},
316 {"zwnj", "\xE2\x80\x8C"},
320 static gchar* entity_extract_to_buffer(gchar *p, gchar b[])
324 while (*p != '\0' && *p != ';' && i < ENTITY_MAX_LEN) {
328 if (*p != ';' || i == 0 || i == ENTITY_MAX_LEN)
335 static gchar *entity_decode_numeric(gchar *str)
337 gchar b[ENTITY_MAX_LEN];
338 gchar *p = str, *res;
339 gboolean hex = FALSE;
354 if (entity_extract_to_buffer (p, b) == NULL)
358 c = g_ascii_strtoll (b, NULL, (hex ? 16 : 10));
361 /* An unprintable character; return the Unicode replacement symbol */
362 return g_strdup("\xef\xbf\xbd");
364 if (!g_unichar_validate(c)) {
365 /* Make sure the character is valid Unicode */
366 debug_print("Numeric reference '&#%s;' is invalid in Unicode codespace\n", b);
370 res = g_malloc0 (DECODED_MAX_LEN + 1);
371 ret = g_unichar_to_utf8 (c, res);
373 debug_print("Failed to convert unicode character %u to UTF-8\n", c);
381 static gchar *entity_decode_symbol(gchar *str)
383 gchar b[ENTITY_MAX_LEN];
386 if (entity_extract_to_buffer (str, b) == NULL)
389 if (symbol_table == NULL) {
392 symbol_table = g_hash_table_new (g_str_hash, g_str_equal);
393 for (i = 0; symbolic_entities[i].key != NULL; ++i) {
394 g_hash_table_insert (symbol_table,
395 symbolic_entities[i].key, symbolic_entities[i].value);
397 debug_print("initialized entities table with %d symbols\n", i);
400 decoded = g_hash_table_lookup (symbol_table, b);
402 return g_strdup (decoded);
407 gchar *entity_decode(gchar *str)
410 if (p == NULL || *p != '&')
416 return entity_decode_numeric(p);
418 return entity_decode_symbol(p);