fix url parsing
[claws.git] / src / html.c
index 9124c1be0ec90ef292937071757c9c9a67a009bf..fddb6533849804ead8400af54d9fc7c19e15002f 100644 (file)
@@ -522,6 +522,7 @@ static HTMLState html_parse_tag(HTMLParser *parser)
 {
        gchar buf[HTMLBUFSIZE];
        gchar *p;
+       static gboolean is_in_href = FALSE;
 
        html_get_parenthesis(parser, buf, sizeof(buf));
 
@@ -541,6 +542,34 @@ static HTMLState html_parse_tag(HTMLParser *parser)
                parser->space = FALSE;
                html_append_char(parser, '\n');
                parser->state = HTML_BR;
+       } else if (!strcmp(buf, "a")) {
+               /* look for tokens separated by space or = */
+               char* href_token = strtok(++p, " =");
+               parser->state = HTML_NORMAL;
+               while (href_token != NULL) {
+                       /* look for href */
+                       if (!strcmp(href_token, "href")) {
+                               /* the next token is the url, between double
+                                        * quotes */
+                               char* url = strtok(NULL, "\"");
+                                       if (url && url[0] == '\'')
+                                         url = strtok(url,"\'");
+
+                               if (!url) break;
+                               html_append_str(parser, url, strlen(url));
+                               html_append_char(parser, ' ');
+                               /* start enforcing html link */
+                               parser->state = HTML_HREF;
+                               is_in_href = TRUE;
+                               break;
+                       }
+                       /* or get next token */
+                       href_token = strtok(NULL, " =");
+               }
+       } else if (!strcmp(buf, "/a")) {
+               /* stop enforcing html link */
+               parser->state = HTML_NORMAL;
+               is_in_href = FALSE;
        } else if (!strcmp(buf, "p")) {
                parser->space = FALSE;
                if (!parser->empty_line) {
@@ -590,6 +619,12 @@ static HTMLState html_parse_tag(HTMLParser *parser)
                }
                parser->state = HTML_NORMAL;
        }
+       
+       if (is_in_href == TRUE) {
+               /* when inside a link, everything will be written as
+                * clickable (see textview_show_thml in textview.c) */
+               parser->state = HTML_HREF;
+       }
 
        return parser->state;
 }
@@ -643,7 +678,7 @@ static void html_get_parenthesis(HTMLParser *parser, gchar *buf, gint len)
        buf[0] = '\0';
        g_return_if_fail(*parser->bufp == '<');
 
-       /* ignore comments */
+       /* ignore comment / CSS / script stuff */
        if (!strncmp(parser->bufp, "<!--", 4)) {
                parser->bufp += 4;
                while ((p = strstr(parser->bufp, "-->")) == NULL)
@@ -651,14 +686,20 @@ static void html_get_parenthesis(HTMLParser *parser, gchar *buf, gint len)
                parser->bufp = p + 3;
                return;
        }
-       /* ignore css stuff */
-       if (!strncmp(parser->bufp, "<STYLE type=text/css>", 21)) {
-               parser->bufp += 21;
-               while ((p = strstr(parser->bufp, "</STYLE>")) == NULL)
+       if (!g_strncasecmp(parser->bufp, "<style", 6)) {
+               parser->bufp += 6;
+               while ((p = strcasestr(parser->bufp, "</style>")) == NULL)
                        if (html_read_line(parser) == HTML_EOF) return;
                parser->bufp = p + 8;
                return;
        }
+       if (!g_strncasecmp(parser->bufp, "<script", 7)) {
+               parser->bufp += 7;
+               while ((p = strcasestr(parser->bufp, "</script>")) == NULL)
+                       if (html_read_line(parser) == HTML_EOF) return;
+               parser->bufp = p + 9;
+               return;
+       }
 
        parser->bufp++;
        while ((p = strchr(parser->bufp, '>')) == NULL)