fix url parsing
[claws.git] / src / html.c
index b02ed9ffb775d5d391cf2c7b4be326d355912163..fddb6533849804ead8400af54d9fc7c19e15002f 100644 (file)
@@ -550,8 +550,12 @@ static HTMLState html_parse_tag(HTMLParser *parser)
                        /* look for href */
                        if (!strcmp(href_token, "href")) {
                                /* the next token is the url, between double
-                                * quotes */
+                                        * quotes */
                                char* url = strtok(NULL, "\"");
+                                       if (url && url[0] == '\'')
+                                         url = strtok(url,"\'");
+
+                               if (!url) break;
                                html_append_str(parser, url, strlen(url));
                                html_append_char(parser, ' ');
                                /* start enforcing html link */
@@ -674,7 +678,7 @@ static void html_get_parenthesis(HTMLParser *parser, gchar *buf, gint len)
        buf[0] = '\0';
        g_return_if_fail(*parser->bufp == '<');
 
-       /* ignore comments */
+       /* ignore comment / CSS / script stuff */
        if (!strncmp(parser->bufp, "<!--", 4)) {
                parser->bufp += 4;
                while ((p = strstr(parser->bufp, "-->")) == NULL)
@@ -682,38 +686,18 @@ static void html_get_parenthesis(HTMLParser *parser, gchar *buf, gint len)
                parser->bufp = p + 3;
                return;
        }
-       /* because html is not strict regarding case and double-quoting of
-          tags we have to check for both */
-       /* ignore css stuff */
-       if (!g_strncasecmp(parser->bufp, "<STYLE type=text/css>", 21)) {
-               parser->bufp += 21;
-               while ((p = strcasestr(parser->bufp, "</STYLE>")) == NULL)
-                       if (html_read_line(parser) == HTML_EOF) return;
-               parser->bufp = p + 8;
-               return;
-       }
-       /* ignore css stuff with double quotes*/
-       if (!g_strncasecmp(parser->bufp, "<STYLE type=\"text/css\">", 23)) {
-               parser->bufp += 23;
-               while ((p = strcasestr(parser->bufp, "</STYLE>")) == NULL)
+       if (!g_strncasecmp(parser->bufp, "<style", 6)) {
+               parser->bufp += 6;
+               while ((p = strcasestr(parser->bufp, "</style>")) == NULL)
                        if (html_read_line(parser) == HTML_EOF) return;
                parser->bufp = p + 8;
                return;
        }
-       /* ignore javascipt stuff */
-       if (!g_strncasecmp(parser->bufp, "<SCRIPT language=javascript>", 28)) {
-               parser->bufp += 28;
-               while ((p = strcasestr(parser->bufp, "</SCRIPT>")) == NULL)
+       if (!g_strncasecmp(parser->bufp, "<script", 7)) {
+               parser->bufp += 7;
+               while ((p = strcasestr(parser->bufp, "</script>")) == NULL)
                        if (html_read_line(parser) == HTML_EOF) return;
-               parser->bufp = p + 8;
-               return;
-       }
-       /* ignore javascipt stuff with double-quotes */
-       if (!g_strncasecmp(parser->bufp, "<SCRIPT language=\"javascript\">", 30)) {
-               parser->bufp += 30;
-               while ((p = strcasestr(parser->bufp, "</SCRIPT>")) == NULL)
-                       if (html_read_line(parser) == HTML_EOF) return;
-               parser->bufp = p + 8;
+               parser->bufp = p + 9;
                return;
        }