2 * Copyright (C) 2006 Andrej Kacian <andrej@kacian.sk>
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
27 #include <procheader.h>
33 #include "parser_atom10.h"
37 FEED_LOC_ATOM10_ENTRY,
38 FEED_LOC_ATOM10_AUTHOR,
39 FEED_LOC_ATOM10_SOURCE,
40 FEED_LOC_ATOM10_CONTENT
41 } FeedAtom10Locations;
43 void feed_parser_atom10_start(void *data, const gchar *el, const gchar **attr)
45 FeedParserCtx *ctx = (FeedParserCtx *)data;
48 if( ctx->depth == 1 ) {
50 if( !strcmp(el, "entry") ) {
51 /* Start of new feed item found.
52 * Create a new FeedItem, freeing the one we already have, if any. */
53 if( ctx->curitem != NULL )
54 feed_item_free(ctx->curitem);
55 ctx->curitem = feed_item_new(ctx->feed);
56 ctx->location = FEED_LOC_ATOM10_ENTRY;
57 } else if( !strcmp(el, "author") ) {
58 /* Start of author info for the feed found.
59 * Set correct location. */
60 ctx->location = FEED_LOC_ATOM10_AUTHOR;
61 } else if( !strcmp(el, "link") ) {
62 if (!feed_parser_get_attribute_value(attr, "rel")) {
63 /* Link tag for the feed */
64 g_free(ctx->feed->link);
66 g_strdup(feed_parser_get_attribute_value(attr, "href"));
68 } else ctx->location = FEED_LOC_ATOM10_NONE;
70 } else if( ctx->depth == 2 ) {
72 /* Make sure we are in one of known locations within the XML structure.
73 * This condition should never be true on a valid Atom feed. */
74 if (ctx->location != FEED_LOC_ATOM10_AUTHOR &&
75 ctx->location != FEED_LOC_ATOM10_ENTRY) {
80 if( !strcmp(el, "author") ) {
81 /* Start of author info for current feed item.
82 * Set correct location. */
83 ctx->location = FEED_LOC_ATOM10_AUTHOR;
84 } else if( !strcmp(el, "link") ) {
85 /* Capture item URL, from the "url" XML attribute. */
86 if (ctx->curitem && ctx->location == FEED_LOC_ATOM10_ENTRY)
87 ctx->curitem->url = g_strdup(feed_parser_get_attribute_value(attr, "href"));
88 } else if( !strcmp(el, "source") ) {
89 ctx->location = FEED_LOC_ATOM10_SOURCE;
90 } else ctx->location = FEED_LOC_ATOM10_ENTRY;
92 if( !strcmp(el, "title") && ctx->curitem != NULL) {
93 a = feed_parser_get_attribute_value(attr, "type");
94 if( !a || !strcmp(a, "text") )
95 ctx->curitem->title_format = FEED_ITEM_TITLE_TEXT;
96 else if( !strcmp(a, "html") )
97 ctx->curitem->title_format = FEED_ITEM_TITLE_HTML;
98 else if( !strcmp(a, "xhtml") )
99 ctx->curitem->title_format = FEED_ITEM_TITLE_XHTML;
101 ctx->curitem->title_format = FEED_ITEM_TITLE_UNKNOWN;
102 } else if (!strcmp(el, "content") && ctx->curitem != NULL) {
103 ctx->location = FEED_LOC_ATOM10_CONTENT;
104 a = feed_parser_get_attribute_value(attr, "type");
105 if (a && !strcmp(a, "xhtml")) {
106 ctx->curitem->xhtml_content = TRUE;
107 ctx->xhtml_str = g_string_new(NULL);
110 } else if (ctx->depth >= 3) {
111 if (ctx->location == FEED_LOC_ATOM10_CONTENT
112 && ctx->curitem != NULL
113 && ctx->curitem->xhtml_content) {
115 GString *txt = ctx->xhtml_str;
116 g_string_append_c(txt, '<');
117 g_string_append(txt, el);
119 for (i = 0; attr[i] != NULL && attr[i+1] != NULL; i += 2) {
120 g_string_append_printf(txt, " %s='%s'", attr[i], attr[i+1]);
122 g_string_append_c(txt, '>');
130 void feed_parser_atom10_end(void *data, const gchar *el)
132 FeedParserCtx *ctx = (FeedParserCtx *)data;
133 Feed *feed = ctx->feed;
134 gchar *text = NULL, *tmp;
136 if( ctx->str != NULL )
137 text = g_strstrip(g_strdup(ctx->str->str));
141 switch( ctx->depth ) {
149 if( !strcmp(el, "feed") ) {
150 /* We have finished parsing the feed, reverse the list
151 * so it's not upside down. */
152 feed->items = g_slist_reverse(ctx->feed->items);
159 /* decide if we just received </entry>, so we can
160 * add a complete item to feed */
161 if( !strcmp(el, "entry") ) {
163 /* Fix up URL, if it is relative */
164 if (ctx->curitem->url != NULL &&
165 !strstr(ctx->curitem->url, "://") &&
166 ctx->feed->link != NULL) {
167 tmp = g_strconcat(ctx->feed->link,
168 (ctx->curitem->url[0] == '/' ? "" : "/"),
169 ctx->curitem->url, NULL);
170 feed_item_set_url(ctx->curitem, tmp);
174 /* append the complete feed item */
175 if( ctx->curitem->id && ctx->curitem->title
176 && ctx->curitem->date_modified ) {
178 g_slist_prepend(feed->items, (gpointer)ctx->curitem);
181 /* since it's in the linked list, lose this pointer */
184 } else if( !strcmp(el, "title") ) { /* so it wasn't end of item */
186 } else if( !strcmp(el, "summary" ) ) {
187 FILL(feed->description)
188 } else if( !strcmp(el, "updated" ) ) {
189 feed->date = procheader_date_parse(NULL, text, 0);
191 /* FIXME: add more later */
197 if( ctx->curitem == NULL )
200 switch(ctx->location) {
202 /* We're in feed/entry */
203 case FEED_LOC_ATOM10_ENTRY:
204 if( !strcmp(el, "title") ) {
205 FILL(ctx->curitem->title)
206 } else if( !strcmp(el, "summary") ) {
207 FILL(ctx->curitem->summary)
208 } else if( !strcmp(el, "id") ) {
209 FILL(ctx->curitem->id)
210 feed_item_set_id_permalink(ctx->curitem, TRUE);
211 } else if( !strcmp(el, "published") ) {
212 ctx->curitem->date_published = procheader_date_parse(NULL, text, 0);
213 } else if( !strcmp(el, "updated") ) {
214 ctx->curitem->date_modified = procheader_date_parse(NULL, text, 0);
219 /* We're in feed/author or about to leave feed/entry/author */
220 case FEED_LOC_ATOM10_AUTHOR:
221 if( !strcmp(el, "author" ) ) {
222 /* We just finished parsing <author> */
223 ctx->curitem->author = g_strdup_printf("%s%s%s%s%s",
224 ctx->name ? ctx->name : "",
225 ctx->name && ctx->mail ? " <" : ctx->mail ? "<" : "",
226 ctx->mail ? ctx->mail : "",
227 ctx->mail ? ">" : "",
228 !ctx->name && !ctx->mail ? "N/A" : "");
229 ctx->location = FEED_LOC_ATOM10_ENTRY;
230 } else if( !strcmp(el, "name") ) {
236 case FEED_LOC_ATOM10_CONTENT:
237 if( !strcmp(el, "content") ) {
238 if (ctx->curitem->xhtml_content) {
239 /* Just in case the <content> tag itself also has some
240 * content of its own, not just the <div> it should,
241 * let's append it to the end. */
242 g_string_append(ctx->xhtml_str, text);
243 ctx->curitem->text = g_string_free(ctx->xhtml_str, FALSE);
244 ctx->xhtml_str = NULL;
246 FILL(ctx->curitem->text)
248 ctx->location = FEED_LOC_ATOM10_ENTRY;
257 if( ctx->curitem == NULL )
260 switch(ctx->location) {
262 /* We're in feed/entry/author */
263 case FEED_LOC_ATOM10_AUTHOR:
264 if( !strcmp(el, "name") ) {
266 } else if( !strcmp(el, "email") ) {
272 /* We're in feed/entry/source */
273 case FEED_LOC_ATOM10_SOURCE:
274 if( !strcmp(el, "title" ) ) {
275 FILL(ctx->curitem->sourcetitle)
276 } else if( !strcmp(el, "id" ) ) {
277 FILL(ctx->curitem->sourceid)
278 } else if( !strcmp(el, "updated" ) ) {
279 ctx->curitem->sourcedate = procheader_date_parse(NULL, text, 0);
284 case FEED_LOC_ATOM10_CONTENT:
285 if (ctx->curitem->xhtml_content) {
286 g_string_append(ctx->xhtml_str, text);
287 g_string_append_printf(ctx->xhtml_str, "</%s>", el);
297 if (ctx->location == FEED_LOC_ATOM10_CONTENT
298 && ctx->curitem->xhtml_content) {
299 g_string_append(ctx->xhtml_str, text);
300 g_string_append_printf(ctx->xhtml_str, "</%s>", el);
305 if( ctx->str != NULL ) {
307 g_string_free(ctx->str, TRUE);