From 1285ad7143f2aa0cf1fc3b6bad8af828d49ffb4c Mon Sep 17 00:00:00 2001 From: Ricardo Mones Date: Mon, 13 Nov 2017 01:31:44 +0100 Subject: [PATCH 1/1] Complete, normalize and fix table of entities https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references --- src/entity.c | 408 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 278 insertions(+), 130 deletions(-) diff --git a/src/entity.c b/src/entity.c index cc7229168..98a01e4c8 100644 --- a/src/entity.c +++ b/src/entity.c @@ -37,137 +37,285 @@ struct _EntitySymbol gchar *const value; }; +/* in alphabetical order with upper-case version first */ static EntitySymbol symbolic_entities[] = { - /* in alphabetical order with upper-case version first */ - {"Aacute", "\303\201"}, - {"aacute", "\303\241"}, - {"Acirc", "\303\202"}, - {"acirc", "\303\242"}, - {"acute", "\302\264"}, - {"AElig", "\303\206"}, - {"aelig", "\303\246"}, - {"Agrave", "\303\200"}, - {"agrave", "\303\240"}, - {"amp", "&" }, - {"apos", "'" }, - {"Aring", "\303\205"}, - {"aring", "\303\245"}, - {"Atilde", "\303\203"}, - {"atilde", "\303\243"}, - {"Auml", "\303\204"}, - {"auml", "\303\244"}, - {"bdquo", "\342\200\236"}, - {"brvbar", "\302\246"}, - {"bull", "\342\200\242"}, - {"Ccedil", "\303\207"}, - {"ccedil", "\303\247"}, - {"cedil", "\302\270"}, - {"cent", "\302\242"}, - {"circ", "\313\206"}, - {"copy", "©" }, - {"curren", "\302\244"}, - {"dagger", "\342\200\240"}, - {"Dagger", "\342\200\241"}, - {"deg", "\302\260"}, - {"divide", "\303\267"}, - {"Eacute", "\303\211"}, - {"eacute", "\303\251"}, - {"Ecirc", "\303\212"}, - {"ecirc", "\303\252"}, - {"Egrave", "\303\210"}, - {"egrave", "\303\250"}, - {"emsp", "\342\200\203"}, - {"ensp", "\342\200\202"}, - {"ETH", "\303\220"}, - {"eth", "\303\260"}, - {"Euml", "\303\213"}, - {"euml", "\303\253"}, - {"euro", "€" }, - {"frac12", "\302\275"}, - {"frac14", "\302\274"}, - {"frac34", "\302\276"}, - {"gt", ">" }, - {"hellip", "…" }, - {"Iacute", "\303\215"}, - {"iacute", "\303\255"}, - {"Icirc", "\303\216"}, - {"icirc", "\303\256"}, - {"iexcl", "\302\241"}, - {"Igrave", "\303\214"}, - {"igrave", "\303\254"}, - {"iquest", "\302\277"}, - {"Iuml", "\303\217"}, - {"iuml", "\303\257"}, - {"laquo", "\302\253"}, - {"ldquo", "“" }, - {"lsaquo", "\342\200\271"}, - {"lsquo", "‘" }, - {"lt", "<" }, - {"macr", "\302\257"}, - {"mdash", "—" }, - {"micro", "\302\265"}, - {"middot", "\302\267"}, - {"nbsp", " " }, - {"ndash", "\342\200\223"}, - {"not", "\302\254"}, - {"Ntilde", "\303\221"}, - {"ntilde", "\303\261"}, - {"Oacute", "\303\223"}, - {"oacute", "\303\263"}, - {"Ocirc", "\303\224"}, - {"ocirc", "\303\264"}, - {"OElig", "\305\222"}, - {"oelig", "\305\223"}, - {"Ograve", "\303\222"}, - {"ograve", "\303\262"}, - {"ordf", "\302\252"}, - {"ordm", "\302\272"}, - {"Oslash", "\303\230"}, - {"oslash", "\303\270"}, - {"Otilde", "\303\225"}, - {"otilde", "\303\265"}, - {"Ouml", "\303\226"}, - {"ouml", "\303\266"}, - {"para", "\302\266"}, - {"permil", "\342\200\260"}, - {"plusmn", "\302\261"}, - {"pound", "\302\243"}, - {"quot", "\"" }, - {"raquo", "\302\273"}, - {"rdquo", "”" }, - {"reg", "®" }, - {"rsaquo", "\342\200\272"}, - {"rsquo", "’" }, - {"sbquo", "\342\200\232"}, - {"Scaron", "\305\240"}, - {"scaron", "\305\241"}, - {"sect", "\302\247"}, - {"shy", "\302\255"}, - {"squot", "\47"}, - {"sup1", "\302\271"}, - {"sup2", "\302\262"}, - {"sup3", "\302\263"}, - {"szlig", "\303\237"}, - {"thinsp", "\342\200\211"}, - {"THORN", "\303\236"}, - {"thorn", "\303\276"}, - {"tilde", "\313\234"}, - {"times", "\303\227"}, - {"trade", "™" }, - {"Uacute", "\303\232"}, - {"uacute", "\303\272"}, - {"Ucirc", "\303\233"}, - {"ucirc", "\303\273"}, - {"Ugrave", "\303\231"}, - {"ugrave", "\303\271"}, - {"uml", "\302\250"}, - {"Uuml", "\303\234"}, - {"uuml", "\303\274"}, - {"Yacute", "\303\235"}, - {"yacute", "\303\275"}, - {"yen", "\302\245"}, - {"yuml", "\303\277"}, - {"Yuml", "\305\270"}, + /* A */ + {"Aacute", "Á"}, + {"aacute", "á"}, + {"Acirc", "Â"}, + {"acirc", "â"}, + {"acute", "´"}, + {"AElig", "Æ"}, + {"aelig", "æ"}, + {"Agrave", "À"}, + {"agrave", "à"}, + {"alefsym", "ℵ"}, + {"Alpha", "Α"}, + {"alpha", "α"}, + {"amp", "&"}, + {"and", "∧"}, + {"ang", "∠"}, + {"apos", "'"}, + {"Aring", "Å"}, + {"aring", "å"}, + {"asymp", "≈"}, + {"Atilde", "Ã"}, + {"atilde", "ã"}, + {"Auml", "Ä"}, + {"auml", "ä"}, + /* B */ + {"bdquo", "„"}, + {"Beta", "Β"}, + {"beta", "β"}, + {"brvbar", "¦"}, + {"bull", "•"}, + /* C */ + {"cap", "∩"}, + {"Ccedil", "Ç"}, + {"ccedil", "ç"}, + {"cedil", "¸"}, + {"cent", "¢"}, + {"Chi", "Χ"}, + {"chi", "χ"}, + {"circ", "ˆ"}, + {"clubs", "♣"}, + {"cong", "≅"}, + {"copy", "©"}, + {"crarr", "↵"}, + {"cup", "∪"}, + {"curren", "¤"}, + /* D */ + {"dagger", "†"}, + {"Dagger", "‡"}, + {"dArr", "⇓"}, + {"darr", "↓"}, + {"deg", "°"}, + {"Delta", "Δ"}, + {"delta", "δ"}, + {"diams", "♦"}, + {"divide", "÷"}, + /* E */ + {"Eacute", "É"}, + {"eacute", "é"}, + {"Ecirc", "Ê"}, + {"ecirc", "ê"}, + {"Egrave", "È"}, + {"egrave", "è"}, + {"empty", "∅"}, + {"emsp", "\xE2\x80\x83"}, + {"ensp", "\xE2\x80\x82"}, + {"Epsilon", "Ε"}, + {"epsilon", "ε"}, + {"equiv", "≡"}, + {"Eta", "Η"}, + {"eta", "η"}, + {"ETH", "Ð"}, + {"eth", "ð"}, + {"Euml", "Ë"}, + {"euml", "ë"}, + {"euro", "€"}, + {"exist", "∃"}, + /* F */ + {"fnof", "ƒ"}, + {"forall", "∀"}, + {"frac12", "½"}, + {"frac14", "¼"}, + {"frac34", "¾"}, + {"frasl", "⁄"}, + /* G */ + {"Gamma", "Γ"}, + {"gamma", "γ"}, + {"ge", "≥"}, + {"gt", ">"}, + /* H */ + {"hArr", "⇔"}, + {"harr", "↔"}, + {"hearts", "♥"}, + {"hellip", "…"}, + /* I */ + {"Iacute", "Í"}, + {"iacute", "í"}, + {"IArr", "⇐"}, + {"Icirc", "Î"}, + {"icirc", "î"}, + {"iexcl", "¡"}, + {"Igrave", "Ì"}, + {"igrave", "ì"}, + {"image", "ℑ"}, + {"infin", "∞"}, + {"int", "∫"}, + {"Iota", "Ι"}, + {"iota", "ι"}, + {"iquest", "¿"}, + {"isin", "∈"}, + {"Iuml", "Ï"}, + {"iuml", "ï"}, + /* K */ + {"Kappa", "Κ"}, + {"kappa", "κ"}, + /* L */ + {"Lambda", "Λ"}, + {"lambda", "λ"}, + {"lang", "〈"}, + {"laquo", "«"}, + {"larr", "←"}, + {"lceil", "⌈"}, + {"ldquo", "“"}, + {"le", "≤"}, + {"lfloor", "⌊"}, + {"lowast", "∗"}, + {"loz", "◊"}, + {"lrm", "\xE2\x80\x8E"}, + {"lsaquo", "‹"}, + {"lsquo", "‘"}, + {"lt", "<"}, + /* M */ + {"macr", "¯"}, + {"mdash", "—"}, + {"micro", "µ"}, + {"middot", "·"}, + {"minus", "−"}, + {"Mu", "Μ"}, + {"mu", "μ"}, + /* N */ + {"nabla", "∇"}, + {"nbsp", "\xC2\xA0"}, + {"ndash", "–"}, + {"ne", "≠"}, + {"ni", "∋"}, + {"not", "¬"}, + {"notin", "∉"}, + {"nsub", "⊄"}, + {"Ntilde", "Ñ"}, + {"ntilde", "ñ"}, + {"Nu", "Ν"}, + {"nu", "ν"}, + /* O */ + {"Oacute", "Ó"}, + {"oacute", "ó"}, + {"Ocirc", "Ô"}, + {"ocirc", "ô"}, + {"OElig", "Œ"}, + {"oelig", "œ"}, + {"Ograve", "Ò"}, + {"ograve", "ò"}, + {"oline", "‾"}, + {"Omega", "Ω"}, + {"omega", "ω"}, + {"Omicron", "Ο"}, + {"omicron", "ο"}, + {"oplus", "⊕"}, + {"or", "∨"}, + {"ordf", "ª"}, + {"ordm", "º"}, + {"Oslash", "Ø"}, + {"oslash", "ø"}, + {"Otilde", "Õ"}, + {"otilde", "õ"}, + {"otimes", "⊗"}, + {"Ouml", "Ö"}, + {"ouml", "ö"}, + /* P */ + {"para", "¶"}, + {"part", "∂"}, + {"permil", "‰"}, + {"perp", "⊥"}, + {"Phi", "Φ"}, + {"phi", "φ"}, + {"Pi", "Π"}, + {"pi", "π"}, + {"piv", "ϖ"}, + {"plusmn", "±"}, + {"pound", "£"}, + {"Prime", "″"}, + {"prime", "′"}, + {"prod", "∏"}, + {"prop", "∝"}, + {"Psi", "Ψ"}, + {"psi", "ψ"}, + /* Q */ + {"quot", "\""}, + /* R */ + {"radic", "√"}, + {"rang", "〉"}, + {"raquo", "»"}, + {"rArr", "⇒"}, + {"rarr", "→"}, + {"rceil", "⌉"}, + {"rdquo", "”"}, + {"real", "ℜ"}, + {"reg", "®"}, + {"rfloor", "⌋"}, + {"Rho", "Ρ"}, + {"rho", "ρ"}, + {"rlm", "\xE2\x80\x8F"}, + {"rsaquo", "›"}, + {"rsquo", "’"}, + /* S */ + {"sbquo", "‚"}, + {"Scaron", "Š"}, + {"scaron", "š"}, + {"sdot", "⋅"}, + {"sect", "§"}, + {"shy", "\xC2\xAD"}, + {"Sigma", "Σ"}, + {"sigma", "σ"}, + {"sigmaf", "ς"}, + {"sim", "∼"}, + {"spades", "♠"}, + {"sub", "⊂"}, + {"sube", "⊆"}, + {"sum", "∑"}, + {"sup", "⊃"}, + {"sup1", "¹"}, + {"sup2", "²"}, + {"sup3", "³"}, + {"supe", "⊇"}, + {"szlig", "ß"}, + /* T */ + {"Tau", "Τ"}, + {"tau", "τ"}, + {"there4", "∴"}, + {"Theta", "Θ"}, + {"theta", "θ"}, + {"thetasym", "ϑ"}, + {"thinsp", "\xE2\x80\x89"}, + {"THORN", "Þ"}, + {"thorn", "þ"}, + {"tilde", "˜"}, + {"times", "×"}, + {"trade", "™"}, + /* U */ + {"Uacute", "Ú"}, + {"uacute", "ú"}, + {"uArr", "⇑"}, + {"uarr", "↑"}, + {"Ucirc", "Û"}, + {"ucirc", "û"}, + {"Ugrave", "Ù"}, + {"ugrave", "ù"}, + {"uml", "¨"}, + {"upsih", "ϒ"}, + {"Upsilon", "Υ"}, + {"upsilon", "υ"}, + {"Uuml", "Ü"}, + {"uuml", "ü"}, + /* W */ + {"weierp", "℘"}, + /* X */ + {"Xi", "Ξ"}, + {"xi", "ξ"}, + /* Y */ + {"Yacute", "Ý"}, + {"yacute", "ý"}, + {"yen", "¥"}, + {"Yuml", "Ÿ"}, + {"yuml", "ÿ"}, + /* Z */ + {"Zeta", "Ζ"}, + {"zeta", "ζ"}, + {"zwj", "\xE2\x80\x8D"}, + {"zwnj", "\xE2\x80\x8C"}, {NULL, NULL} }; -- 2.25.1