2 * Copyright (C) 2006 Andrej Kacian <andrej@kacian.sk>
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
30 #include "parser_atom10.h"
34 FEED_LOC_ATOM10_ENTRY,
35 FEED_LOC_ATOM10_AUTHOR,
36 FEED_LOC_ATOM10_SOURCE,
37 FEED_LOC_ATOM10_CONTENT
38 } FeedAtom10Locations;
40 void feed_parser_atom10_start(void *data, const gchar *el, const gchar **attr)
42 FeedParserCtx *ctx = (FeedParserCtx *)data;
45 if( ctx->depth == 1 ) {
47 if( !strcmp(el, "entry") ) {
48 /* Start of new feed item found.
49 * Create a new FeedItem, freeing the one we already have, if any. */
50 if( ctx->curitem != NULL )
51 feed_item_free(ctx->curitem);
52 ctx->curitem = feed_item_new(ctx->feed);
53 ctx->location = FEED_LOC_ATOM10_ENTRY;
54 } else if( !strcmp(el, "author") ) {
55 /* Start of author info for the feed found.
56 * Set correct location. */
57 ctx->location = FEED_LOC_ATOM10_AUTHOR;
58 } else if( !strcmp(el, "link") ) {
59 if (!feed_parser_get_attribute_value(attr, "rel")) {
60 /* Link tag for the feed */
61 g_free(ctx->feed->link);
63 g_strdup(feed_parser_get_attribute_value(attr, "href"));
65 } else ctx->location = FEED_LOC_ATOM10_NONE;
67 } else if( ctx->depth == 2 ) {
69 /* Make sure we are in one of known locations within the XML structure.
70 * This condition should never be true on a valid Atom feed. */
71 if (ctx->location != FEED_LOC_ATOM10_AUTHOR &&
72 ctx->location != FEED_LOC_ATOM10_ENTRY) {
77 if( !strcmp(el, "author") ) {
78 /* Start of author info for current feed item.
79 * Set correct location. */
80 ctx->location = FEED_LOC_ATOM10_AUTHOR;
81 } else if( !strcmp(el, "link") ) {
82 /* Capture item URL, from the "url" XML attribute. */
83 if (ctx->curitem && ctx->location == FEED_LOC_ATOM10_ENTRY)
84 ctx->curitem->url = g_strdup(feed_parser_get_attribute_value(attr, "href"));
85 } else if( !strcmp(el, "source") ) {
86 ctx->location = FEED_LOC_ATOM10_SOURCE;
87 } else ctx->location = FEED_LOC_ATOM10_ENTRY;
89 if( !strcmp(el, "title") ) {
90 a = feed_parser_get_attribute_value(attr, "type");
91 if( !a || !strcmp(a, "text") )
92 ctx->curitem->title_format = FEED_ITEM_TITLE_TEXT;
93 else if( !strcmp(a, "html") )
94 ctx->curitem->title_format = FEED_ITEM_TITLE_HTML;
95 else if( !strcmp(a, "xhtml") )
96 ctx->curitem->title_format = FEED_ITEM_TITLE_XHTML;
98 ctx->curitem->title_format = FEED_ITEM_TITLE_UNKNOWN;
99 } else if (!strcmp(el, "content") ) {
100 a = feed_parser_get_attribute_value(attr, "type");
101 if (a && !strcmp(a, "xhtml")) {
102 ctx->curitem->xhtml_content = TRUE;
103 ctx->location = FEED_LOC_ATOM10_CONTENT;
111 void feed_parser_atom10_end(void *data, const gchar *el)
113 FeedParserCtx *ctx = (FeedParserCtx *)data;
114 Feed *feed = ctx->feed;
115 gchar *text = NULL, *tmp;
117 if( ctx->str != NULL )
118 text = g_strstrip(g_strdup(ctx->str->str));
122 switch( ctx->depth ) {
130 if( !strcmp(el, "feed") ) {
131 /* We have finished parsing the feed, reverse the list
132 * so it's not upside down. */
133 feed->items = g_slist_reverse(ctx->feed->items);
140 /* decide if we just received </entry>, so we can
141 * add a complete item to feed */
142 if( !strcmp(el, "entry") ) {
144 /* Fix up URL, if it is relative */
145 if (ctx->curitem->url != NULL &&
146 !strstr(ctx->curitem->url, "://") &&
147 ctx->feed->link != NULL) {
148 tmp = g_strconcat(ctx->feed->link,
149 (ctx->curitem->url[0] == '/' ? "" : "/"),
150 ctx->curitem->url, NULL);
151 feed_item_set_url(ctx->curitem, tmp);
155 /* append the complete feed item */
156 if( ctx->curitem->id && ctx->curitem->title
157 && ctx->curitem->date_modified ) {
159 g_slist_prepend(feed->items, (gpointer)ctx->curitem);
162 /* since it's in the linked list, lose this pointer */
165 } else if( !strcmp(el, "title") ) { /* so it wasn't end of item */
167 } else if( !strcmp(el, "summary" ) ) {
168 FILL(feed->description)
169 } else if( !strcmp(el, "updated" ) ) {
170 feed->date = parseISO8601Date(text);
172 /* FIXME: add more later */
178 if( ctx->curitem == NULL )
181 switch(ctx->location) {
183 /* We're in feed/entry */
184 case FEED_LOC_ATOM10_ENTRY:
185 if( !strcmp(el, "title") ) {
186 FILL(ctx->curitem->title)
187 } else if( !strcmp(el, "summary") ) {
188 FILL(ctx->curitem->summary)
189 } else if( !strcmp(el, "content") ) {
190 if (!ctx->curitem->xhtml_content)
191 FILL(ctx->curitem->text)
192 } else if( !strcmp(el, "id") ) {
193 FILL(ctx->curitem->id)
194 feed_item_set_id_permalink(ctx->curitem, TRUE);
195 } else if( !strcmp(el, "published") ) {
196 ctx->curitem->date_published = parseISO8601Date(text);
197 } else if( !strcmp(el, "updated") ) {
198 ctx->curitem->date_modified = parseISO8601Date(text);
203 /* We're in feed/author or about to leave feed/entry/author */
204 case FEED_LOC_ATOM10_AUTHOR:
205 if( !strcmp(el, "author" ) ) {
206 /* We just finished parsing <author> */
207 ctx->curitem->author = g_strdup_printf("%s%s%s%s%s",
208 ctx->name ? ctx->name : "",
209 ctx->name && ctx->mail ? " <" : ctx->mail ? "<" : "",
210 ctx->mail ? ctx->mail : "",
211 ctx->mail ? ">" : "",
212 !ctx->name && !ctx->mail ? "N/A" : "");
213 ctx->location = FEED_LOC_ATOM10_ENTRY;
214 } else if( !strcmp(el, "name") ) {
225 if( ctx->curitem == NULL )
228 switch(ctx->location) {
230 /* We're in feed/entry/author */
231 case FEED_LOC_ATOM10_AUTHOR:
232 if( !strcmp(el, "name") ) {
234 } else if( !strcmp(el, "email") ) {
240 /* We're in feed/entry/source */
241 case FEED_LOC_ATOM10_SOURCE:
242 if( !strcmp(el, "title" ) ) {
243 FILL(ctx->curitem->sourcetitle)
244 } else if( !strcmp(el, "id" ) ) {
245 FILL(ctx->curitem->sourceid)
246 } else if( !strcmp(el, "updated" ) ) {
247 ctx->curitem->sourcedate = parseISO8601Date(text);
252 case FEED_LOC_ATOM10_CONTENT:
253 if (!strcmp(el, "div") && ctx->curitem->xhtml_content)
254 FILL(ctx->curitem->text)
263 if( ctx->str != NULL ) {
265 g_string_free(ctx->str, TRUE);