RSSyl: Ignore rel="..." feed link in Atom parser.
[claws.git] / src / plugins / rssyl / libfeed / parser_atom10.c
1 /*
2  * Copyright (C) 2006 Andrej Kacian <andrej@kacian.sk>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 02111-1307, USA.
18  */
19 #define __USE_GNU
20
21 #include <glib.h>
22 #include <expat.h>
23 #include <string.h>
24 #include <stdio.h>
25
26 #include "feed.h"
27 #include "feeditem.h"
28 #include "date.h"
29 #include "parser.h"
30 #include "parser_atom10.h"
31
32 void feed_parser_atom10_start(void *data, const gchar *el, const gchar **attr)
33 {
34         FeedParserCtx *ctx = (FeedParserCtx *)data;
35         gchar *a = NULL;
36
37         if( ctx->depth == 1 ) {
38
39                 if( !strcmp(el, "entry") ) {
40                         /* Start of new feed item found.
41                          * Create a new FeedItem, freeing the one we already have, if any. */
42                         if( ctx->curitem != NULL )
43                                 feed_item_free(ctx->curitem);
44                         ctx->curitem = feed_item_new(ctx->feed);
45                         ctx->location = FEED_LOC_ATOM10_ENTRY;
46                 } else if( !strcmp(el, "author") ) {
47                         /* Start of author info for the feed found.
48                          * Set correct location. */
49                         ctx->location = FEED_LOC_ATOM10_AUTHOR;
50                 } else if( !strcmp(el, "link") ) {
51                         if (!feed_parser_get_attribute_value(attr, "rel")) {
52                                 /* Link tag for the feed */
53                                 g_free(ctx->feed->link);
54                                 ctx->feed->link =
55                                         g_strdup(feed_parser_get_attribute_value(attr, "href"));
56                         }
57                 } else ctx->location = FEED_LOC_ATOM10_NONE;
58
59         } else if( ctx->depth == 2 ) {
60
61                 /* Make sure we are in one of known locations within the XML structure.
62                  * This condition should never be true on a valid Atom feed. */
63                 if (ctx->location != FEED_LOC_ATOM10_AUTHOR &&
64                                 ctx->location != FEED_LOC_ATOM10_ENTRY) {
65                         ctx->depth++;
66                         return;
67                 }
68
69                 if( !strcmp(el, "author") ) {
70                         /* Start of author info for current feed item.
71                          * Set correct location. */
72                         ctx->location = FEED_LOC_ATOM10_AUTHOR;
73                 } else if( !strcmp(el, "link") ) {
74                         /* Capture item URL, from the "url" XML attribute. */
75                         if (ctx->curitem && ctx->location == FEED_LOC_ATOM10_ENTRY)
76                   ctx->curitem->url = g_strdup(feed_parser_get_attribute_value(attr, "href"));
77                 } else if( !strcmp(el, "source") ) {
78                         ctx->location = FEED_LOC_ATOM10_SOURCE;
79                 } else ctx->location = FEED_LOC_ATOM10_ENTRY;
80
81                 if( !strcmp(el, "title") ) {
82                         a = feed_parser_get_attribute_value(attr, "type");
83                         if( !a || !strcmp(a, "text") )
84                                 ctx->curitem->title_format = FEED_ITEM_TITLE_TEXT;
85                         else if( !strcmp(a, "html") )
86                                 ctx->curitem->title_format = FEED_ITEM_TITLE_HTML;
87                         else if( !strcmp(a, "xhtml") )
88                                 ctx->curitem->title_format = FEED_ITEM_TITLE_XHTML;
89                         else
90                                 ctx->curitem->title_format = FEED_ITEM_TITLE_UNKNOWN;
91                 } else if (!strcmp(el, "content") ) {
92                         a = feed_parser_get_attribute_value(attr, "type");
93                         if (a && !strcmp(a, "xhtml")) {
94                                 ctx->curitem->xhtml_content = TRUE;
95                                 ctx->location = FEED_LOC_ATOM10_CONTENT;
96                         }
97                 }
98         }
99
100         ctx->depth++;
101 }
102
103 void feed_parser_atom10_end(void *data, const gchar *el)
104 {
105         FeedParserCtx *ctx = (FeedParserCtx *)data;
106         Feed *feed = ctx->feed;
107         gchar *text = NULL, *tmp;
108
109         if( ctx->str != NULL )
110                 text = ctx->str->str;
111         else
112                 text = "";
113
114         switch( ctx->depth ) {
115
116                 case 0:
117                         /* Just in case. */
118                         break;
119
120                 case 1:
121
122                         if( !strcmp(el, "feed") ) {
123                                 /* We have finished parsing the feed, reverse the list
124                                  * so it's not upside down. */
125                                 feed->items = g_slist_reverse(ctx->feed->items);
126                         }
127
128                         break;
129
130                 case 2:
131
132                         /* decide if we just received </entry>, so we can
133                          * add a complete item to feed */
134                         if( !strcmp(el, "entry") ) {
135
136                                 /* Fix up URL, if it is relative */
137                                 if (ctx->curitem->url != NULL &&
138                                                 !strstr("://", ctx->curitem->url) &&
139                                                 ctx->feed->link != NULL) {
140                                         tmp = g_strconcat(ctx->feed->link,
141                                                         (ctx->curitem->url[0] == '/' ? "" : "/"),
142                                                         ctx->curitem->url, NULL);
143                                         feed_item_set_url(ctx->curitem, tmp);
144                                         g_free(tmp);
145                                 }
146
147                                 /* append the complete feed item */
148                                 if( ctx->curitem->id && ctx->curitem->title
149                                                 && ctx->curitem->date_modified ) {
150                                         feed->items = 
151                                                 g_slist_prepend(feed->items, (gpointer)ctx->curitem);
152                                 }
153                                 
154                                 /* since it's in the linked list, lose this pointer */
155                                 ctx->curitem = NULL;
156
157                         } else if( !strcmp(el, "title") ) {     /* so it wasn't end of item */
158                                 FILL(feed->title)
159                         } else if( !strcmp(el, "summary" ) ) {
160                                 FILL(feed->description)
161                         } else if( !strcmp(el, "updated" ) ) {
162                                 feed->date = parseISO8601Date(text);
163                         }
164                         /* FIXME: add more later */
165
166                         break;
167
168                 case 3:
169
170                         if( ctx->curitem == NULL )
171                                 break;
172
173                         switch(ctx->location) {
174
175                                 /* We're in feed/entry */
176                                 case FEED_LOC_ATOM10_ENTRY:
177                                         if( !strcmp(el, "title") ) {
178                                                 FILL(ctx->curitem->title)
179                                         } else if( !strcmp(el, "summary") ) {
180                                                 FILL(ctx->curitem->summary)
181                                         } else if( !strcmp(el, "content") ) {
182                                                 if (!ctx->curitem->xhtml_content)
183                                                         FILL(ctx->curitem->text)
184                                         } else if( !strcmp(el, "id") ) {
185                                                 FILL(ctx->curitem->id)
186                                                 feed_item_set_id_permalink(ctx->curitem, TRUE);
187                                         } else if( !strcmp(el, "published") ) {
188                                                 ctx->curitem->date_published = parseISO8601Date(text);
189                                         } else if( !strcmp(el, "updated") ) {
190                                                 ctx->curitem->date_modified = parseISO8601Date(text);
191                                         }
192
193                                         break;
194
195                                 /* We're in feed/author or about to leave feed/entry/author */
196                                 case FEED_LOC_ATOM10_AUTHOR:
197                                         if( !strcmp(el, "author" ) ) {
198                                                 /* We just finished parsing <author> */
199                                                 ctx->curitem->author = g_strdup_printf("%s%s%s%s%s",
200                                                                 ctx->name ? ctx->name : "",
201                                                                 ctx->name && ctx->mail ? " <" : ctx->mail ? "<" : "",
202                                                                 ctx->mail ? ctx->mail : "",
203                                                                 ctx->mail ? ">" : "",
204                                                                 !ctx->name && !ctx->mail ? "N/A" : "");
205                                                 ctx->location = FEED_LOC_ATOM10_ENTRY;
206                                         } else if( !strcmp(el, "name") ) {
207                                                 FILL(feed->author)
208                                         }
209
210                                         break;
211                         }
212
213                         break;
214
215                 case 4:
216
217                         if( ctx->curitem == NULL )
218                                 break;
219
220                         switch(ctx->location) {
221
222                                 /* We're in feed/entry/author */
223                                 case FEED_LOC_ATOM10_AUTHOR:
224                                         if( !strcmp(el, "name") ) {
225                                                 FILL(ctx->name)
226                                         } else if( !strcmp(el, "email") ) {
227                                                 FILL(ctx->mail)
228                                         }
229
230                                         break;
231
232                                 /* We're in feed/entry/source */
233                                 case FEED_LOC_ATOM10_SOURCE:
234                                         if( !strcmp(el, "title" ) ) {
235                                                 FILL(ctx->curitem->sourcetitle)
236                                         } else if( !strcmp(el, "id" ) ) {
237                                                 FILL(ctx->curitem->sourceid)
238                                         } else if( !strcmp(el, "updated" ) ) {
239                                                 ctx->curitem->sourcedate = parseISO8601Date(text);
240                                         }
241
242                                         break;
243
244                                 case FEED_LOC_ATOM10_CONTENT:
245                                         if (!strcmp(el, "div") && ctx->curitem->xhtml_content)
246                                                 FILL(ctx->curitem->text)
247                                         break;
248
249                                 }
250
251
252                         break;
253         }
254
255         if( ctx->str != NULL ) {
256                 g_string_free(ctx->str, TRUE);
257                 ctx->str = NULL;
258         }
259         ctx->str = NULL;
260
261         ctx->depth--;
262 }