#include "claws-features.h"
#endif
-#include "defs.h"
#include "utils.h"
-#include "entity.h"
#define ENTITY_MAX_LEN 8
#define DECODED_MAX_LEN 6
gchar *const value;
};
+/* in alphabetical order with upper-case version first */
static EntitySymbol symbolic_entities[] = {
- /* in alphabetical order with upper-case version first */
- {"Aacute", "\303\201"},
- {"aacute", "\303\241"},
- {"Acirc", "\303\202"},
- {"acirc", "\303\242"},
- {"acute", "\302\264"},
- {"AElig", "\303\206"},
- {"aelig", "\303\246"},
- {"Agrave", "\303\200"},
- {"agrave", "\303\240"},
- {"amp", "&" },
- {"apos", "'" },
- {"Aring", "\303\205"},
- {"aring", "\303\245"},
- {"Atilde", "\303\203"},
- {"atilde", "\303\243"},
- {"Auml", "\303\204"},
- {"auml", "\303\244"},
- {"bdquo", "\342\200\236"},
- {"brvbar", "\302\246"},
- {"bull", "\342\200\242"},
- {"Ccedil", "\303\207"},
- {"ccedil", "\303\247"},
- {"cedil", "\302\270"},
- {"cent", "\302\242"},
- {"circ", "\313\206"},
- {"copy", "©" },
- {"curren", "\302\244"},
- {"dagger", "\342\200\240"},
- {"Dagger", "\342\200\241"},
- {"deg", "\302\260"},
- {"divide", "\303\267"},
- {"Eacute", "\303\211"},
- {"eacute", "\303\251"},
- {"Ecirc", "\303\212"},
- {"ecirc", "\303\252"},
- {"Egrave", "\303\210"},
- {"egrave", "\303\250"},
- {"emsp", "\342\200\203"},
- {"ensp", "\342\200\202"},
- {"ETH", "\303\220"},
- {"eth", "\303\260"},
- {"Euml", "\303\213"},
- {"euml", "\303\253"},
- {"euro", "€" },
- {"frac12", "\302\275"},
- {"frac14", "\302\274"},
- {"frac34", "\302\276"},
- {"gt", ">" },
- {"hellip", "…" },
- {"Iacute", "\303\215"},
- {"iacute", "\303\255"},
- {"Icirc", "\303\216"},
- {"icirc", "\303\256"},
- {"iexcl", "\302\241"},
- {"Igrave", "\303\214"},
- {"igrave", "\303\254"},
- {"iquest", "\302\277"},
- {"Iuml", "\303\217"},
- {"iuml", "\303\257"},
- {"laquo", "\302\253"},
- {"ldquo", "“" },
- {"lsaquo", "\342\200\271"},
- {"lsquo", "‘" },
- {"lt", "<" },
- {"macr", "\302\257"},
- {"mdash", "—" },
- {"micro", "\302\265"},
- {"middot", "\302\267"},
- {"nbsp", " " },
- {"ndash", "\342\200\223"},
- {"not", "\302\254"},
- {"Ntilde", "\303\221"},
- {"ntilde", "\303\261"},
- {"Oacute", "\303\223"},
- {"oacute", "\303\263"},
- {"Ocirc", "\303\224"},
- {"ocirc", "\303\264"},
- {"OElig", "\305\222"},
- {"oelig", "\305\223"},
- {"Ograve", "\303\222"},
- {"ograve", "\303\262"},
- {"ordf", "\302\252"},
- {"ordm", "\302\272"},
- {"Oslash", "\303\230"},
- {"oslash", "\303\270"},
- {"Otilde", "\303\225"},
- {"otilde", "\303\265"},
- {"Ouml", "\303\226"},
- {"ouml", "\303\266"},
- {"para", "\302\266"},
- {"permil", "\342\200\260"},
- {"plusmn", "\302\261"},
- {"pound", "\302\243"},
- {"quot", "\"" },
- {"raquo", "\302\273"},
- {"rdquo", "”" },
- {"reg", "®" },
- {"rsaquo", "\342\200\272"},
- {"rsquo", "’" },
- {"sbquo", "\342\200\232"},
- {"Scaron", "\305\240"},
- {"scaron", "\305\241"},
- {"sect", "\302\247"},
- {"shy", "\302\255"},
- {"squot", "\47"},
- {"sup1", "\302\271"},
- {"sup2", "\302\262"},
- {"sup3", "\302\263"},
- {"szlig", "\303\237"},
- {"thinsp", "\342\200\211"},
- {"THORN", "\303\236"},
- {"thorn", "\303\276"},
- {"tilde", "\313\234"},
- {"times", "\303\227"},
- {"trade", "™" },
- {"Uacute", "\303\232"},
- {"uacute", "\303\272"},
- {"Ucirc", "\303\233"},
- {"ucirc", "\303\273"},
- {"Ugrave", "\303\231"},
- {"ugrave", "\303\271"},
- {"uml", "\302\250"},
- {"Uuml", "\303\234"},
- {"uuml", "\303\274"},
- {"Yacute", "\303\235"},
- {"yacute", "\303\275"},
- {"yen", "\302\245"},
- {"yuml", "\303\277"},
- {"Yuml", "\305\270"},
+ /* A */
+ {"Aacute", "Á"},
+ {"aacute", "á"},
+ {"Acirc", "Â"},
+ {"acirc", "â"},
+ {"acute", "´"},
+ {"AElig", "Æ"},
+ {"aelig", "æ"},
+ {"Agrave", "À"},
+ {"agrave", "à"},
+ {"alefsym", "ℵ"},
+ {"Alpha", "Α"},
+ {"alpha", "α"},
+ {"amp", "&"},
+ {"and", "∧"},
+ {"ang", "∠"},
+ {"apos", "'"},
+ {"Aring", "Å"},
+ {"aring", "å"},
+ {"asymp", "≈"},
+ {"Atilde", "Ã"},
+ {"atilde", "ã"},
+ {"Auml", "Ä"},
+ {"auml", "ä"},
+ /* B */
+ {"bdquo", "„"},
+ {"Beta", "Β"},
+ {"beta", "β"},
+ {"brvbar", "¦"},
+ {"bull", "•"},
+ /* C */
+ {"cap", "∩"},
+ {"Ccedil", "Ç"},
+ {"ccedil", "ç"},
+ {"cedil", "¸"},
+ {"cent", "¢"},
+ {"Chi", "Χ"},
+ {"chi", "χ"},
+ {"circ", "ˆ"},
+ {"clubs", "♣"},
+ {"cong", "≅"},
+ {"copy", "©"},
+ {"crarr", "↵"},
+ {"cup", "∪"},
+ {"curren", "¤"},
+ /* D */
+ {"dagger", "†"},
+ {"Dagger", "‡"},
+ {"dArr", "⇓"},
+ {"darr", "↓"},
+ {"deg", "°"},
+ {"Delta", "Δ"},
+ {"delta", "δ"},
+ {"diams", "♦"},
+ {"divide", "÷"},
+ /* E */
+ {"Eacute", "É"},
+ {"eacute", "é"},
+ {"Ecirc", "Ê"},
+ {"ecirc", "ê"},
+ {"Egrave", "È"},
+ {"egrave", "è"},
+ {"empty", "∅"},
+ {"emsp", "\xE2\x80\x83"},
+ {"ensp", "\xE2\x80\x82"},
+ {"Epsilon", "Ε"},
+ {"epsilon", "ε"},
+ {"equiv", "≡"},
+ {"Eta", "Η"},
+ {"eta", "η"},
+ {"ETH", "Ð"},
+ {"eth", "ð"},
+ {"Euml", "Ë"},
+ {"euml", "ë"},
+ {"euro", "€"},
+ {"exist", "∃"},
+ /* F */
+ {"fnof", "ƒ"},
+ {"forall", "∀"},
+ {"frac12", "½"},
+ {"frac14", "¼"},
+ {"frac34", "¾"},
+ {"frasl", "⁄"},
+ /* G */
+ {"Gamma", "Γ"},
+ {"gamma", "γ"},
+ {"ge", "≥"},
+ {"gt", ">"},
+ /* H */
+ {"hArr", "⇔"},
+ {"harr", "↔"},
+ {"hearts", "♥"},
+ {"hellip", "…"},
+ /* I */
+ {"Iacute", "Í"},
+ {"iacute", "í"},
+ {"IArr", "⇐"},
+ {"Icirc", "Î"},
+ {"icirc", "î"},
+ {"iexcl", "¡"},
+ {"Igrave", "Ì"},
+ {"igrave", "ì"},
+ {"image", "ℑ"},
+ {"infin", "∞"},
+ {"int", "∫"},
+ {"Iota", "Ι"},
+ {"iota", "ι"},
+ {"iquest", "¿"},
+ {"isin", "∈"},
+ {"Iuml", "Ï"},
+ {"iuml", "ï"},
+ /* K */
+ {"Kappa", "Κ"},
+ {"kappa", "κ"},
+ /* L */
+ {"Lambda", "Λ"},
+ {"lambda", "λ"},
+ {"lang", "〈"},
+ {"laquo", "«"},
+ {"larr", "←"},
+ {"lceil", "⌈"},
+ {"ldquo", "“"},
+ {"le", "≤"},
+ {"lfloor", "⌊"},
+ {"lowast", "∗"},
+ {"loz", "◊"},
+ {"lrm", "\xE2\x80\x8E"},
+ {"lsaquo", "‹"},
+ {"lsquo", "‘"},
+ {"lt", "<"},
+ /* M */
+ {"macr", "¯"},
+ {"mdash", "—"},
+ {"micro", "µ"},
+ {"middot", "·"},
+ {"minus", "−"},
+ {"Mu", "Μ"},
+ {"mu", "μ"},
+ /* N */
+ {"nabla", "∇"},
+ {"nbsp", "\xC2\xA0"},
+ {"ndash", "–"},
+ {"ne", "≠"},
+ {"ni", "∋"},
+ {"not", "¬"},
+ {"notin", "∉"},
+ {"nsub", "⊄"},
+ {"Ntilde", "Ñ"},
+ {"ntilde", "ñ"},
+ {"Nu", "Ν"},
+ {"nu", "ν"},
+ /* O */
+ {"Oacute", "Ó"},
+ {"oacute", "ó"},
+ {"Ocirc", "Ô"},
+ {"ocirc", "ô"},
+ {"OElig", "Œ"},
+ {"oelig", "œ"},
+ {"Ograve", "Ò"},
+ {"ograve", "ò"},
+ {"oline", "‾"},
+ {"Omega", "Ω"},
+ {"omega", "ω"},
+ {"Omicron", "Ο"},
+ {"omicron", "ο"},
+ {"oplus", "⊕"},
+ {"or", "∨"},
+ {"ordf", "ª"},
+ {"ordm", "º"},
+ {"Oslash", "Ø"},
+ {"oslash", "ø"},
+ {"Otilde", "Õ"},
+ {"otilde", "õ"},
+ {"otimes", "⊗"},
+ {"Ouml", "Ö"},
+ {"ouml", "ö"},
+ /* P */
+ {"para", "¶"},
+ {"part", "∂"},
+ {"permil", "‰"},
+ {"perp", "⊥"},
+ {"Phi", "Φ"},
+ {"phi", "φ"},
+ {"Pi", "Π"},
+ {"pi", "π"},
+ {"piv", "ϖ"},
+ {"plusmn", "±"},
+ {"pound", "£"},
+ {"Prime", "″"},
+ {"prime", "′"},
+ {"prod", "∏"},
+ {"prop", "∝"},
+ {"Psi", "Ψ"},
+ {"psi", "ψ"},
+ /* Q */
+ {"quot", "\""},
+ /* R */
+ {"radic", "√"},
+ {"rang", "〉"},
+ {"raquo", "»"},
+ {"rArr", "⇒"},
+ {"rarr", "→"},
+ {"rceil", "⌉"},
+ {"rdquo", "”"},
+ {"real", "ℜ"},
+ {"reg", "®"},
+ {"rfloor", "⌋"},
+ {"Rho", "Ρ"},
+ {"rho", "ρ"},
+ {"rlm", "\xE2\x80\x8F"},
+ {"rsaquo", "›"},
+ {"rsquo", "’"},
+ /* S */
+ {"sbquo", "‚"},
+ {"Scaron", "Š"},
+ {"scaron", "š"},
+ {"sdot", "⋅"},
+ {"sect", "§"},
+ {"shy", "\xC2\xAD"},
+ {"Sigma", "Σ"},
+ {"sigma", "σ"},
+ {"sigmaf", "ς"},
+ {"sim", "∼"},
+ {"spades", "♠"},
+ {"sub", "⊂"},
+ {"sube", "⊆"},
+ {"sum", "∑"},
+ {"sup", "⊃"},
+ {"sup1", "¹"},
+ {"sup2", "²"},
+ {"sup3", "³"},
+ {"supe", "⊇"},
+ {"szlig", "ß"},
+ /* T */
+ {"Tau", "Τ"},
+ {"tau", "τ"},
+ {"there4", "∴"},
+ {"Theta", "Θ"},
+ {"theta", "θ"},
+ {"thetasym", "ϑ"},
+ {"thinsp", "\xE2\x80\x89"},
+ {"THORN", "Þ"},
+ {"thorn", "þ"},
+ {"tilde", "˜"},
+ {"times", "×"},
+ {"trade", "™"},
+ /* U */
+ {"Uacute", "Ú"},
+ {"uacute", "ú"},
+ {"uArr", "⇑"},
+ {"uarr", "↑"},
+ {"Ucirc", "Û"},
+ {"ucirc", "û"},
+ {"Ugrave", "Ù"},
+ {"ugrave", "ù"},
+ {"uml", "¨"},
+ {"upsih", "ϒ"},
+ {"Upsilon", "Υ"},
+ {"upsilon", "υ"},
+ {"Uuml", "Ü"},
+ {"uuml", "ü"},
+ /* W */
+ {"weierp", "℘"},
+ /* X */
+ {"Xi", "Ξ"},
+ {"xi", "ξ"},
+ /* Y */
+ {"Yacute", "Ý"},
+ {"yacute", "ý"},
+ {"yen", "¥"},
+ {"Yuml", "Ÿ"},
+ {"yuml", "ÿ"},
+ /* Z */
+ {"Zeta", "Ζ"},
+ {"zeta", "ζ"},
+ {"zwj", "\xE2\x80\x8D"},
+ {"zwnj", "\xE2\x80\x8C"},
{NULL, NULL}
};
b[i] = *p;
++i, ++p;
}
- if (*p != ';' || i == ENTITY_MAX_LEN)
+ if (*p != ';' || i == 0 || i == ENTITY_MAX_LEN)
return NULL;
b[i] = '\0';
gchar b[ENTITY_MAX_LEN];
gchar *p = str, *res;
gboolean hex = FALSE;
- gunichar c;
+ gunichar c = 0;
+ gint ret;
++p;
if (*p == '\0')
if (entity_extract_to_buffer (p, b) == NULL)
return NULL;
- c = g_ascii_strtoll (b, NULL, (hex? 16: 10));
+ if (strlen(b) > 0)
+ c = g_ascii_strtoll (b, NULL, (hex ? 16 : 10));
+
+ if (c < 32)
+ /* An unprintable character; return the Unicode replacement symbol */
+ return g_strdup("\xef\xbf\xbd");
+
+ if (!g_unichar_validate(c)) {
+ /* Make sure the character is valid Unicode */
+ debug_print("Numeric reference '&#%s;' is invalid in Unicode codespace\n", b);
+ return NULL;
+ }
+
res = g_malloc0 (DECODED_MAX_LEN + 1);
- g_unichar_to_utf8 (c, res);
+ ret = g_unichar_to_utf8 (c, res);
+ if (ret == 0) {
+ debug_print("Failed to convert unicode character %u to UTF-8\n", c);
+ g_free(res);
+ res = NULL;
+ }
return res;
}