From ff4d8436b565d85eba59a06f982358ea5d5a8e24 Mon Sep 17 00:00:00 2001 From: Colin Leroy Date: Wed, 8 Aug 2007 16:01:54 +0000 Subject: [PATCH] 2007-08-08 [colin] 2.10.0cvs100 * src/html.c Complete symbol table, thanks to wwp. Fixes bug 1284, 'The html -> text converter ignores entities' * src/msgcache.c Fix possible fd leak --- ChangeLog | 9 ++ PATCHSETS | 1 + configure.ac | 2 +- src/html.c | 420 ++++++++++++++++++++++++++++++------------------- src/msgcache.c | 4 +- 5 files changed, 274 insertions(+), 162 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5216910b0..e03ca7275 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2007-08-08 [colin] 2.10.0cvs100 + + * src/html.c + Complete symbol table, thanks to wwp. + Fixes bug 1284, 'The html -> text + converter ignores entities' + * src/msgcache.c + Fix possible fd leak + 2007-08-08 [colin] 2.10.0cvs99 * src/imap.c diff --git a/PATCHSETS b/PATCHSETS index 9e6a6836a..932c662b1 100644 --- a/PATCHSETS +++ b/PATCHSETS @@ -2753,3 +2753,4 @@ ( cvs diff -u -r 1.1.2.9 -r 1.1.2.10 manual/account.xml; cvs diff -u -r 1.1.2.39 -r 1.1.2.40 manual/advanced.xml; cvs diff -u -r 1.1.2.8 -r 1.1.2.9 manual/fr/account.xml; cvs diff -u -r 1.1.2.15 -r 1.1.2.16 manual/fr/advanced.xml; ) > 2.10.0cvs97.patchset ( cvs diff -u -r 1.179.2.177 -r 1.179.2.178 src/imap.c; cvs diff -u -r 1.94.2.143 -r 1.94.2.144 src/messageview.c; cvs diff -u -r 1.16.2.53 -r 1.16.2.54 src/msgcache.c; ) > 2.10.0cvs98.patchset ( cvs diff -u -r 1.179.2.178 -r 1.179.2.179 src/imap.c; ) > 2.10.0cvs99.patchset +( cvs diff -u -r 1.12.2.21 -r 1.12.2.22 src/html.c; cvs diff -u -r 1.16.2.54 -r 1.16.2.55 src/msgcache.c; ) > 2.10.0cvs100.patchset diff --git a/configure.ac b/configure.ac index 0076fdca0..a7bdb2f6b 100644 --- a/configure.ac +++ b/configure.ac @@ -11,7 +11,7 @@ MINOR_VERSION=10 MICRO_VERSION=0 INTERFACE_AGE=0 BINARY_AGE=0 -EXTRA_VERSION=99 +EXTRA_VERSION=100 EXTRA_RELEASE= EXTRA_GTK2_VERSION= diff --git a/src/html.c b/src/html.c index 49a6e25ce..e124c6b3d 100644 --- a/src/html.c +++ b/src/html.c @@ -38,94 +38,265 @@ struct _SC_HTMLSymbol }; static SC_HTMLSymbol symbol_list[] = { - {"<", "\74"}, - {">", "\76"}, - {"&", "\46"}, - {""", "\42"}, - {"‘", "\47"}, - {"’", "\47"}, - {"“", "\42"}, - {"”", "\42"}, - {"«", "\302\253"}, - {"»", "\302\273"}, - {" ", "\40"}, - {"™", "\50\124\115\51"}, - {"…", "\56\56\56"}, - {"•", "\52"}, - {"–", "\55"}, - {"—", "\55\55"}, - {"€", "\105\125\122"}, - {"¢", "\302\242"}, - {"£", "\302\243"}, - {"¤", "\302\244"}, - {"¥", "\302\245"}, - {"©", "\302\251"}, - {"®", "\302\256"}, - {"¿", "\302\277"}, - {"¡", "\302\241"} -}; - -static SC_HTMLSymbol ascii_symbol_list[] = { - {"¡" , "\302\241"}, - {"¦", "\302\246"}, - {"©" , "\302\251"}, - {"«" , "\302\253"}, - {"®" , "\302\256"}, - - {"²" , "\302\262"}, - {"³" , "\302\263"}, - {"´" , "\302\264"}, - {"¸" , "\302\270"}, - {"¹" , "\302\271"}, - {"»" , "\302\273"}, - {"¼", "\302\274"}, - {"½", "\302\275"}, - {"¾", "\302\276"}, - {"¿", "\302\277"}, - - {"À", "\303\200"}, - {"Á", "\303\201"}, - {"Â" , "\303\202"}, - {"Ã", "\303\203"}, - {"Æ" , "\303\206"}, - {"È", "\303\210"}, - {"É", "\303\211"}, - {"Ê" , "\303\212"}, - {"Ì", "\303\214"}, - {"Í", "\303\215"}, - {"Î" , "\303\216"}, - - {"Ñ", "\303\221"}, - {"Ò", "\303\222"}, - {"Ó", "\303\223"}, - {"Ô" , "\303\224"}, - {"Õ", "\303\225"}, - {"Ù", "\303\231"}, - {"Ú", "\303\232"}, - {"Û" , "\303\233"}, - {"Ý", "\303\235"}, - - {"à", "\303\240"}, - {"á", "\303\241"}, - {"â" , "\303\242"}, - {"ã", "\303\243"}, - {"æ" , "\303\246"}, - {"è", "\303\250"}, - {"é", "\303\251"}, - {"ê" , "\303\252"}, - {"ì", "\303\254"}, - {"í", "\303\255"}, - {"î" , "\303\256"}, - - {"ñ", "\303\261"}, - {"ò", "\303\262"}, - {"ó", "\303\263"}, - {"ô" , "\303\264"}, - {"õ", "\303\265"}, - {"ù", "\303\271"}, - {"ú", "\303\272"}, - {"û" , "\303\273"}, - {"ý", "\303\275"} + {""", "\42"}, + {"&", "\46"}, + {"'", "\47"}, + {"<", "\74"}, + {">", "\76"}, + {"’", "\47"}, + {"™", "\342\204\242"}, + {" ", "\40"}, + {"¡", "\302\241"}, + {"¢", "\302\242"}, + {"£", "\302\243"}, + {"¤", "\302\244"}, + {"¥", "\302\245"}, + {"¦", "\302\246"}, + {"§", "\302\247"}, + {"¨", "\302\250"}, + {"©", "\302\251"}, + {"ª", "\302\252"}, + {"«", "\302\253"}, + {"¬", "\302\254"}, + {"­", "\302\255"}, + {"®", "\302\256"}, + {"¯", "\302\257"}, + {"°", "\302\260"}, + {"±", "\302\261"}, + {"²", "\302\262"}, + {"³", "\302\263"}, + {"´", "\302\264"}, + {"µ", "\302\265"}, + {"¶", "\302\266"}, + {"·", "\302\267"}, + {"¸", "\302\270"}, + {"¹", "\302\271"}, + {"º", "\302\272"}, + {"»", "\302\273"}, + {"¼", "\302\274"}, + {"½", "\302\275"}, + {"¾", "\302\276"}, + {"¿", "\302\277"}, + {"À", "\303\200"}, + {"Á", "\303\201"}, + {"Â", "\303\202"}, + {"Ã", "\303\203"}, + {"Ä", "\303\204"}, + {"Å", "\303\205"}, + {"Æ", "\303\206"}, + {"Ç", "\303\207"}, + {"È", "\303\210"}, + {"É", "\303\211"}, + {"Ê", "\303\212"}, + {"Ë", "\303\213"}, + {"Ì", "\303\214"}, + {"Í", "\303\215"}, + {"Î", "\303\216"}, + {"Ï", "\303\217"}, + {"Ð", "\303\220"}, + {"Ñ", "\303\221"}, + {"Ò", "\303\222"}, + {"Ó", "\303\223"}, + {"Ô", "\303\224"}, + {"Õ", "\303\225"}, + {"Ö", "\303\226"}, + {"×", "\303\227"}, + {"Ø", "\303\230"}, + {"Ù", "\303\231"}, + {"Ú", "\303\232"}, + {"Û", "\303\233"}, + {"Ü", "\303\234"}, + {"Ý", "\303\235"}, + {"Þ", "\303\236"}, + {"ß", "\303\237"}, + {"à", "\303\240"}, + {"á", "\303\241"}, + {"â", "\303\242"}, + {"ã", "\303\243"}, + {"ä", "\303\244"}, + {"å", "\303\245"}, + {"æ", "\303\246"}, + {"ç", "\303\247"}, + {"è", "\303\250"}, + {"é", "\303\251"}, + {"ê", "\303\252"}, + {"ë", "\303\253"}, + {"ì", "\303\254"}, + {"í", "\303\255"}, + {"î", "\303\256"}, + {"ï", "\303\257"}, + {"ð", "\303\260"}, + {"ñ", "\303\261"}, + {"ò", "\303\262"}, + {"ó", "\303\263"}, + {"ô", "\303\264"}, + {"õ", "\303\265"}, + {"ö", "\303\266"}, + {"÷", "\303\267"}, + {"ø", "\303\270"}, + {"ù", "\303\271"}, + {"ú", "\303\272"}, + {"û", "\303\273"}, + {"ü", "\303\274"}, + {"ý", "\303\275"}, + {"þ", "\303\276"}, + {"ÿ", "\303\277"}, + {"Œ", "\305\222"}, + {"œ", "\305\223"}, + {"Š", "\305\240"}, + {"š", "\305\241"}, + {"Ÿ", "\305\270"}, + {"ˆ", "\313\206"}, + {"˜", "\313\234"}, + {" ", "\342\200\202"}, + {" ", "\342\200\203"}, + {" ", "\342\200\211"}, + {"–", "\342\200\223"}, + {"—", "\342\200\224"}, + {"‘", "\342\200\230"}, + {"’", "\342\200\231"}, + {"‚", "\342\200\232"}, + {"“", "\342\200\234"}, + {"”", "\342\200\235"}, + {"„", "\342\200\236"}, + {"†", "\342\200\240"}, + {"‡", "\342\200\241"}, + {"•", "\342\200\242"}, + {"…", "\342\200\246"}, + {"‰", "\342\200\260"}, + {"‹", "\342\200\271"}, + {"›", "\342\200\272"}, + {"€", "\342\202\254"}, + {"™", "\342\204\242"}, + {""", "\42"}, + {"&", "\46"}, + {"'", "\47"}, + {"<", "\74"}, + {">", "\76"}, + {"&squot;", "\47"}, + {" ", "\40"}, + {"¡", "\302\241"}, + {"¢", "\302\242"}, + {"£", "\302\243"}, + {"¤", "\302\244"}, + {"¥", "\302\245"}, + {"¦", "\302\246"}, + {"§", "\302\247"}, + {"¨", "\302\250"}, + {"©", "\302\251"}, + {"ª", "\302\252"}, + {"«", "\302\253"}, + {"¬", "\302\254"}, + {"­", "\302\255"}, + {"®", "\302\256"}, + {"¯", "\302\257"}, + {"°", "\302\260"}, + {"±", "\302\261"}, + {"²", "\302\262"}, + {"³", "\302\263"}, + {"´", "\302\264"}, + {"µ", "\302\265"}, + {"¶", "\302\266"}, + {"·", "\302\267"}, + {"¸", "\302\270"}, + {"¹", "\302\271"}, + {"º", "\302\272"}, + {"»", "\302\273"}, + {"¼", "\302\274"}, + {"½", "\302\275"}, + {"¾", "\302\276"}, + {"¿", "\302\277"}, + {"À", "\303\200"}, + {"Á", "\303\201"}, + {"Â", "\303\202"}, + {"Ã", "\303\203"}, + {"Ä", "\303\204"}, + {"Å", "\303\205"}, + {"Æ", "\303\206"}, + {"Ç", "\303\207"}, + {"È", "\303\210"}, + {"É", "\303\211"}, + {"Ê", "\303\212"}, + {"Ë", "\303\213"}, + {"Ì", "\303\214"}, + {"Í", "\303\215"}, + {"Î", "\303\216"}, + {"Ï", "\303\217"}, + {"Ð", "\303\220"}, + {"Ñ", "\303\221"}, + {"Ò", "\303\222"}, + {"Ó", "\303\223"}, + {"Ô", "\303\224"}, + {"Õ", "\303\225"}, + {"Ö", "\303\226"}, + {"×", "\303\227"}, + {"Ø", "\303\230"}, + {"Ù", "\303\231"}, + {"Ú", "\303\232"}, + {"Û", "\303\233"}, + {"Ü", "\303\234"}, + {"Ý", "\303\235"}, + {"Þ", "\303\236"}, + {"ß", "\303\237"}, + {"à", "\303\240"}, + {"á", "\303\241"}, + {"â", "\303\242"}, + {"ã", "\303\243"}, + {"ä", "\303\244"}, + {"å", "\303\245"}, + {"æ", "\303\246"}, + {"ç", "\303\247"}, + {"è", "\303\250"}, + {"é", "\303\251"}, + {"ê", "\303\252"}, + {"ë", "\303\253"}, + {"ì", "\303\254"}, + {"í", "\303\255"}, + {"î", "\303\256"}, + {"ï", "\303\257"}, + {"ð", "\303\260"}, + {"ñ", "\303\261"}, + {"ò", "\303\262"}, + {"ó", "\303\263"}, + {"ô", "\303\264"}, + {"õ", "\303\265"}, + {"ö", "\303\266"}, + {"÷", "\303\267"}, + {"ø", "\303\270"}, + {"ù", "\303\271"}, + {"ú", "\303\272"}, + {"û", "\303\273"}, + {"ü", "\303\274"}, + {"ý", "\303\275"}, + {"þ", "\303\276"}, + {"ÿ", "\303\277"}, + {"Œ", "\305\222"}, + {"œ", "\305\223"}, + {"Š", "\305\240"}, + {"š", "\305\241"}, + {"Ÿ", "\305\270"}, + {"ˆ", "\313\206"}, + {"˜", "\313\234"}, + {" ", "\342\200\202"}, + {" ", "\342\200\203"}, + {" ", "\342\200\211"}, + {"–", "\342\200\223"}, + {"—", "\342\200\224"}, + {"‘", "\342\200\230"}, + {"’", "\342\200\231"}, + {"‚", "\342\200\232"}, + {"“", "\342\200\234"}, + {"”", "\342\200\235"}, + {"„", "\342\200\236"}, + {"†", "\342\200\240"}, + {"‡", "\342\200\241"}, + {"•", "\342\200\242"}, + {"…", "\342\200\246"}, + {"‰", "\342\200\260"}, + {"‹", "\342\200\271"}, + {"›", "\342\200\272"}, + {"€", "\342\202\254"}, + {"™", "\342\204\242"} }; typedef struct _SC_HTMLAltSymbol SC_HTMLAltSymbol; @@ -136,53 +307,7 @@ struct _SC_HTMLAltSymbol gchar *const val; }; -/* http://www.w3schools.com/html/html_entitiesref.asp */ -static SC_HTMLAltSymbol alternate_symbol_list[] = { - { 96, "\140"}, /* backtick */ - { 153, "\50\124\115\51"}, /* trademark */ - { 161, "\302\241"}, /* inverted exclamation mark ¡ */ - { 162, "\302\242"}, /* cent (currency) ¢ */ - { 163, "\302\243"}, /* pound (currency) £ */ - { 164, "\342\202\254"}, /* currency sign ¤ */ - { 165, "\302\245"}, /* yen (currency) ¥ */ - { 169, "\302\251"}, /* copyright sign © */ - { 174, "\302\256"}, /* registered sign ® */ - { 191, "\302\277"}, /* inverted question mark ¿ */ - { 338, "\117\105"}, /* capital ligature OE &OElig */ - { 339, "\157\145"}, /* small ligature OE &oelig */ - { 352, NULL}, /* capital S w/caron &Scaron */ - { 353, NULL}, /* small S w/caron &scaron */ - { 376, NULL}, /* cap Y w/ diaeres &Yuml */ - { 710, "\136"}, /* circumflex accent &circ */ - { 732, "\176"}, /* small tilde &tilde */ - {8194, "\40"}, /* en space &ensp */ - {8195, "\40"}, /* em space &emsp */ - {8201, "\40"}, /* thin space &thinsp */ - {8204, NULL}, /* zero width non-joiner &zwnj */ - {8205, NULL}, /* zero width joiner &zwj */ - {8206, NULL}, /* l-t-r mark &lrm */ - {8207, NULL}, /* r-t-l mark &rlm */ - {8211, "\55"}, /* en dash &ndash */ - {8212, "\55\55"}, /* em dash &mdash */ - {8216, "\47"}, /* l single quot mark &lsquo */ - {8217, "\47"}, /* r single quot mark &rsquo */ - {8218, "\54"}, /* single low-9 quot &sbquo */ - {8220, "\134"}, /* l double quot mark &ldquo */ - {8221, "\134"}, /* r double quot mark &rdquo */ - {8222, "\42"}, /* double low-9 quot &bdquo */ - {8224, NULL}, /* dagger &dagger */ - {8225, NULL}, /* double dagger &Dagger */ - {8226, "\52"}, /* bullet &bull */ - {8230, "\56\56\56"}, /* horizontal ellipsis &hellip */ - {8240, "\45\157"}, /* per mile &permil */ - {8249, "\74"}, /* l-pointing angle quot &lsaquo */ - {8250, "\76"}, /* r-pointing angle quot &rsaquo */ - {8364, "\105\125\122"}, /* euro &euro */ - {8482, "\50\124\115\51"} /* trademark &trade */ -}; - static GHashTable *default_symbol_table; -static GHashTable *alternate_symbol_table; static SC_HTMLState sc_html_read_line (SC_HTMLParser *parser); static void sc_html_append_char (SC_HTMLParser *parser, @@ -236,19 +361,12 @@ SC_HTMLParser *sc_html_parser_new(FILE *fp, CodeConverter *conv) default_symbol_table = g_hash_table_new(g_str_hash, g_str_equal); SYMBOL_TABLE_ADD(default_symbol_table, symbol_list); - SYMBOL_TABLE_ADD(default_symbol_table, ascii_symbol_list); - } - if (!alternate_symbol_table) { - alternate_symbol_table = - g_hash_table_new(g_int_hash, g_int_equal); - SYMBOL_TABLE_REF_ADD(alternate_symbol_table, alternate_symbol_list); } #undef SYMBOL_TABLE_ADD #undef SYMBOL_TABLE_REF_ADD parser->symbol_table = default_symbol_table; - parser->alt_symbol_table = alternate_symbol_table; return parser; } @@ -598,25 +716,7 @@ static void sc_html_parse_special(SC_HTMLParser *parser) sc_html_append_str(parser, val, -1); parser->state = SC_HTML_NORMAL; return; - } else if (symbol_name[1] == '#' && g_ascii_isdigit(symbol_name[2])) { - gint ch; - - ch = atoi(symbol_name + 2); - if ((ch > 0 && ch <= 127) || - (ch >= 128 && ch <= 255 && - parser->conv->charset == C_ISO_8859_1)) { - sc_html_append_char(parser, ch); - parser->state = SC_HTML_NORMAL; - return; - } else { - const gchar *symb = g_hash_table_lookup(parser->alt_symbol_table, &ch); - if (symb) { - sc_html_append_str(parser, symb, -1); - parser->state = SC_HTML_NORMAL; - return; - } - } - } + } sc_html_append_str(parser, symbol_name, -1); } diff --git a/src/msgcache.c b/src/msgcache.c index 4ab562ce6..b35346deb 100644 --- a/src/msgcache.c +++ b/src/msgcache.c @@ -596,8 +596,10 @@ MsgCache *msgcache_read_cache(FolderItem *item, const gchar *cache_file) tmp_flags |= MSG_DRAFT; } - if (msgcache_read_cache_data_str(fp, &srccharset, NULL) < 0) + if (msgcache_read_cache_data_str(fp, &srccharset, NULL) < 0) { + fclose(fp); return NULL; + } dstcharset = CS_UTF_8; if (srccharset == NULL || dstcharset == NULL) { conv = NULL; -- 2.25.1