};
static SC_HTMLSymbol symbol_list[] = {
- {"<" , "<"},
- {">" , ">"},
- {"&" , "&"},
- {""" , "\""},
- {"‘", "'"},
- {"’", "'"},
- {"“", "\""},
- {"”", "\""},
- {" " , " "},
- {"™" , "(TM)"},
- {"…", "..."},
- {"•", "*"},
- {"–", "-"},
- {"—", "--"},
- {"€", "EUR"},
- {"¢", "c"},
- {"£", "£"},
+ {"<", "\74"},
+ {">", "\76"},
+ {"&", "\46"},
+ {""", "\42"},
+ {"‘", "\47"},
+ {"’", "\47"},
+ {"“", "\42"},
+ {"”", "\42"},
+ {"«", "\302\253"},
+ {"»", "\302\273"},
+ {" ", "\40"},
+ {"™", "\50\124\115\51"},
+ {"…", "\56\56\56"},
+ {"•", "\52"},
+ {"–", "\55"},
+ {"—", "\55\55"},
+ {"€", "\105\125\122"},
+ {"¢", "\302\242"},
+ {"£", "\302\243"},
+ {"¤", "\302\244"},
+ {"¥", "\302\245"},
+ {"©", "\302\251"},
+ {"®", "\302\256"},
+ {"¿", "\302\277"},
+ {"¡", "\302\241"}
};
static SC_HTMLSymbol ascii_symbol_list[] = {
{"ù", "\303\271"},
{"ú", "\303\272"},
{"û" , "\303\273"},
- {"ý", "\303\275"},
+ {"ý", "\303\275"}
+};
+
+typedef struct _SC_HTMLAltSymbol SC_HTMLAltSymbol;
+
+struct _SC_HTMLAltSymbol
+{
+ gint key;
+ gchar *const val;
+};
+
+/* http://www.w3schools.com/html/html_entitiesref.asp */
+static SC_HTMLAltSymbol alternate_symbol_list[] = {
+ { 96, "\140"}, /* backtick */
+ { 153, "\50\124\115\51"}, /* trademark */
+ { 161, "\302\241"}, /* inverted exclamation mark ¡ */
+ { 162, "\302\242"}, /* cent (currency) ¢ */
+ { 163, "\302\243"}, /* pound (currency) £ */
+ { 164, "\342\202\254"}, /* currency sign ¤ */
+ { 165, "\302\245"}, /* yen (currency) ¥ */
+ { 169, "\302\251"}, /* copyright sign © */
+ { 174, "\302\256"}, /* registered sign ® */
+ { 191, "\302\277"}, /* inverted question mark ¿ */
+ { 338, "\117\105"}, /* capital ligature OE &OElig */
+ { 339, "\157\145"}, /* small ligature OE &oelig */
+ { 352, NULL}, /* capital S w/caron &Scaron */
+ { 353, NULL}, /* small S w/caron &scaron */
+ { 376, NULL}, /* cap Y w/ diaeres &Yuml */
+ { 710, "\136"}, /* circumflex accent &circ */
+ { 732, "\176"}, /* small tilde &tilde */
+ {8194, "\40"}, /* en space &ensp */
+ {8195, "\40"}, /* em space &emsp */
+ {8201, "\40"}, /* thin space &thinsp */
+ {8204, NULL}, /* zero width non-joiner &zwnj */
+ {8205, NULL}, /* zero width joiner &zwj */
+ {8206, NULL}, /* l-t-r mark &lrm */
+ {8207, NULL}, /* r-t-l mark &rlm */
+ {8211, "\55"}, /* en dash &ndash */
+ {8212, "\55\55"}, /* em dash &mdash */
+ {8216, "\47"}, /* l single quot mark &lsquo */
+ {8217, "\47"}, /* r single quot mark &rsquo */
+ {8218, "\54"}, /* single low-9 quot &sbquo */
+ {8220, "\134"}, /* l double quot mark &ldquo */
+ {8221, "\134"}, /* r double quot mark &rdquo */
+ {8222, "\42"}, /* double low-9 quot &bdquo */
+ {8224, NULL}, /* dagger &dagger */
+ {8225, NULL}, /* double dagger &Dagger */
+ {8226, "\52"}, /* bullet &bull */
+ {8230, "\56\56\56"}, /* horizontal ellipsis &hellip */
+ {8240, "\45\157"}, /* per mile &permil */
+ {8249, "\74"}, /* l-pointing angle quot &lsaquo */
+ {8250, "\76"}, /* r-pointing angle quot &rsaquo */
+ {8364, "\105\125\122"}, /* euro &euro */
+ {8482, "\50\124\115\51"} /* trademark &trade */
};
static GHashTable *default_symbol_table;
+static GHashTable *alternate_symbol_table;
static SC_HTMLState sc_html_read_line (SC_HTMLParser *parser);
static void sc_html_append_char (SC_HTMLParser *parser,
for (i = 0; i < sizeof(list) / sizeof(list[0]); i++) \
g_hash_table_insert(table, list[i].key, list[i].val); \
}
+#define SYMBOL_TABLE_REF_ADD(table, list) \
+{ \
+ gint i; \
+ \
+ for (i = 0; i < sizeof(list) / sizeof(list[0]); i++) \
+ g_hash_table_insert(table, &list[i].key, list[i].val); \
+}
if (!default_symbol_table) {
default_symbol_table =
SYMBOL_TABLE_ADD(default_symbol_table, symbol_list);
SYMBOL_TABLE_ADD(default_symbol_table, ascii_symbol_list);
}
+ if (!alternate_symbol_table) {
+ alternate_symbol_table =
+ g_hash_table_new(g_int_hash, g_int_equal);
+ SYMBOL_TABLE_REF_ADD(alternate_symbol_table, alternate_symbol_list);
+ }
#undef SYMBOL_TABLE_ADD
+#undef SYMBOL_TABLE_REF_ADD
parser->symbol_table = default_symbol_table;
+ parser->alt_symbol_table = alternate_symbol_table;
return parser;
}
parser->state = SC_HTML_NORMAL;
return;
} else {
- char *symb = NULL;
- switch (ch) {
- /* http://www.w3schools.com/html/html_entitiesref.asp */
- case 96: /* backtick */
- symb = "`";
- break;
- case 153: /* trademark */
- symb = "(TM)";
- break;
- case 162: /* cent (currency) ¢ */
- symb = "c";
- break;
- case 163: /* pound (currency) £ */
- symb = "£";
- break;
- case 338: /* capital ligature OE Œ */
- symb = "OE";
- break;
- case 339: /* small ligature OE œ */
- symb = "oe";
- break;
- case 352: /* capital S w/caron Š */
- case 353: /* small S w/caron š */
- case 376: /* cap Y w/ diaeres Ÿ */
- break;
- case 710: /* circumflex accent ˆ */
- symb = "^";
- break;
- case 732: /* small tilde ˜ */
- symb = "~";
- break;
- case 8194: /* en space   */
- case 8195: /* em space   */
- case 8201: /* thin space   */
- symb = " ";
- break;
- case 8204: /* zero width non-joiner ‌ */
- case 8205: /* zero width joiner ‍ */
- case 8206: /* l-t-r mark ‎ */
- case 8207: /* r-t-l mark &rlm */
- break;
- case 8211: /* en dash – */
- symb = "-";
- break;
- case 8212: /* em dash — */
- symb = "--";
- break;
- case 8216: /* l single quot mark ‘ */
- case 8217: /* r single quot mark ’ */
- symb = "'";
- break;
- case 8218: /* single low-9 quot ‚ */
- symb = ",";
- break;
- case 8220: /* l double quot mark “ */
- case 8221: /* r double quot mark ” */
- symb = "\"";
- break;
- case 8222: /* double low-9 quot „ */
- symb = ",,";
- break;
- case 8224: /* dagger † */
- case 8225: /* double dagger ‡ */
- break;
- case 8226: /* bullet • */
- symb = "*";
- break;
- case 8230: /* horizontal ellipsis … */
- symb = "...";
- break;
- case 8240: /* per mile ‰ */
- symb = "\%o";
- break;
- case 8249: /* l-pointing angle quot ‹ */
- symb = "<";
- break;
- case 8250: /* r-pointing angle quot › */
- symb = ">";
- break;
- case 8364: /* euro € */
- symb = "EUR";
- break;
- case 8482: /* trademark ™ */
- symb = "(TM)";
- break;
- default:
- break;
- }
+ const gchar *symb = g_hash_table_lookup(parser->alt_symbol_table, &ch);
if (symb) {
sc_html_append_str(parser, symb, -1);
parser->state = SC_HTML_NORMAL;