#include <string.h>
#include <stdio.h>
+#include <procheader.h>
+
#include "feed.h"
#include "feeditem.h"
#include "date.h"
#include "parser.h"
#include "parser_atom10.h"
+enum {
+ FEED_LOC_ATOM10_NONE,
+ FEED_LOC_ATOM10_ENTRY,
+ FEED_LOC_ATOM10_AUTHOR,
+ FEED_LOC_ATOM10_SOURCE,
+ FEED_LOC_ATOM10_CONTENT
+} FeedAtom10Locations;
+
void feed_parser_atom10_start(void *data, const gchar *el, const gchar **attr)
{
FeedParserCtx *ctx = (FeedParserCtx *)data;
} else if( !strcmp(el, "link") ) {
/* Capture item URL, from the "url" XML attribute. */
if (ctx->curitem && ctx->location == FEED_LOC_ATOM10_ENTRY)
- ctx->curitem->url = g_strdup(feed_parser_get_attribute_value(attr, "href"));
+ ctx->curitem->url = g_strdup(feed_parser_get_attribute_value(attr, "href"));
} else if( !strcmp(el, "source") ) {
ctx->location = FEED_LOC_ATOM10_SOURCE;
} else ctx->location = FEED_LOC_ATOM10_ENTRY;
- if( !strcmp(el, "title") ) {
+ if( !strcmp(el, "title") && ctx->curitem != NULL) {
a = feed_parser_get_attribute_value(attr, "type");
if( !a || !strcmp(a, "text") )
ctx->curitem->title_format = FEED_ITEM_TITLE_TEXT;
ctx->curitem->title_format = FEED_ITEM_TITLE_XHTML;
else
ctx->curitem->title_format = FEED_ITEM_TITLE_UNKNOWN;
- } else if (!strcmp(el, "content") ) {
+ } else if (!strcmp(el, "content") && ctx->curitem != NULL) {
+ ctx->location = FEED_LOC_ATOM10_CONTENT;
a = feed_parser_get_attribute_value(attr, "type");
if (a && !strcmp(a, "xhtml")) {
ctx->curitem->xhtml_content = TRUE;
- ctx->location = FEED_LOC_ATOM10_CONTENT;
+ ctx->xhtml_str = g_string_new(NULL);
}
}
+ } else if (ctx->depth >= 3) {
+ if (ctx->curitem->xhtml_content
+ && ctx->location == FEED_LOC_ATOM10_CONTENT) {
+ guint i;
+ GString *txt = ctx->xhtml_str;
+ g_string_append_c(txt, '<');
+ g_string_append(txt, el);
+
+ for (i = 0; attr[i] != NULL && attr[i+1] != NULL; i += 2) {
+ g_string_append_printf(txt, " %s='%s'", attr[i], attr[i+1]);
+ }
+ g_string_append_c(txt, '>');
+ }
}
+
ctx->depth++;
}
gchar *text = NULL, *tmp;
if( ctx->str != NULL )
- text = ctx->str->str;
+ text = g_strstrip(g_strdup(ctx->str->str));
else
text = "";
} else if( !strcmp(el, "summary" ) ) {
FILL(feed->description)
} else if( !strcmp(el, "updated" ) ) {
- feed->date = parseISO8601Date(text);
+ feed->date = procheader_date_parse(NULL, text, 0);
}
/* FIXME: add more later */
FILL(ctx->curitem->title)
} else if( !strcmp(el, "summary") ) {
FILL(ctx->curitem->summary)
- } else if( !strcmp(el, "content") ) {
- if (!ctx->curitem->xhtml_content)
- FILL(ctx->curitem->text)
} else if( !strcmp(el, "id") ) {
FILL(ctx->curitem->id)
feed_item_set_id_permalink(ctx->curitem, TRUE);
} else if( !strcmp(el, "published") ) {
- ctx->curitem->date_published = parseISO8601Date(text);
+ ctx->curitem->date_published = procheader_date_parse(NULL, text, 0);
} else if( !strcmp(el, "updated") ) {
- ctx->curitem->date_modified = parseISO8601Date(text);
+ ctx->curitem->date_modified = procheader_date_parse(NULL, text, 0);
}
break;
}
break;
- }
+ case FEED_LOC_ATOM10_CONTENT:
+ if( !strcmp(el, "content") ) {
+ if (ctx->curitem->xhtml_content) {
+ /* Just in case the <content> tag itself also has some
+ * content of its own, not just the <div> it should,
+ * let's append it to the end. */
+ g_string_append(ctx->xhtml_str, text);
+ ctx->curitem->text = g_string_free(ctx->xhtml_str, FALSE);
+ ctx->xhtml_str = NULL;
+ } else {
+ FILL(ctx->curitem->text)
+ }
+ ctx->location = FEED_LOC_ATOM10_ENTRY;
+ }
+
+ break;
+ }
break;
case 4:
} else if( !strcmp(el, "id" ) ) {
FILL(ctx->curitem->sourceid)
} else if( !strcmp(el, "updated" ) ) {
- ctx->curitem->sourcedate = parseISO8601Date(text);
+ ctx->curitem->sourcedate = procheader_date_parse(NULL, text, 0);
}
break;
case FEED_LOC_ATOM10_CONTENT:
- if (!strcmp(el, "div") && ctx->curitem->xhtml_content)
- FILL(ctx->curitem->text)
+ if (ctx->curitem->xhtml_content) {
+ g_string_append(ctx->xhtml_str, text);
+ g_string_append_printf(ctx->xhtml_str, "</%s>", el);
+ }
break;
}
break;
+
+ default:
+ if (ctx->location == FEED_LOC_ATOM10_CONTENT
+ && ctx->curitem->xhtml_content) {
+ g_string_append(ctx->xhtml_str, text);
+ g_string_append_printf(ctx->xhtml_str, "</%s>", el);
+ }
+ break;
}
if( ctx->str != NULL ) {
+ g_free(text);
g_string_free(ctx->str, TRUE);
ctx->str = NULL;
}