RSSyl: Stop earlier when an invalid feed is encountered.
[claws.git] / src / plugins / rssyl / libfeed / feed.c
1 /*
2  * Copyright (C) 2006 Andrej Kacian <andrej@kacian.sk>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 02111-1307, USA.
18  */
19
20 #define __USE_GNU
21
22 #include <stdlib.h>
23 #include <glib.h>
24 #include <curl/curl.h>
25 #include <expat.h>
26
27 #include "feed.h"
28 #include "parser.h"
29
30 /* feed_new()
31  * Initializes new Feed struct, setting its url and a default timeout. */
32 Feed *feed_new(gchar *url)
33 {
34         Feed *feed = NULL;
35
36         g_return_val_if_fail(url != NULL, NULL);
37
38         feed = malloc( sizeof(Feed) );
39         g_return_val_if_fail(feed != NULL, NULL);
40
41         feed->is_valid = TRUE;
42         feed->timeout = FEED_DEFAULT_TIMEOUT;
43         feed->url = g_strdup(url);
44         feed->auth = NULL;
45         feed->title = NULL;
46         feed->description = NULL;
47         feed->language = NULL;
48         feed->author = NULL;
49         feed->generator = NULL;
50         feed->link = NULL;
51         feed->items = NULL;
52
53         feed->fetcherr = NULL;
54         feed->cookies_path = NULL;
55
56         feed->ssl_verify_peer = TRUE;
57         feed->cacert_file = NULL;
58
59         return feed;
60 }
61
62 static void _free_items(gpointer item, gpointer nada)
63 {
64         feed_item_free(item);
65 }
66
67 static void _free_auth(Feed *feed)
68 {
69         if (feed == NULL)
70                 return;
71
72         if (feed->auth != NULL) {
73                 if (feed->auth->username != NULL)
74                         g_free(feed->auth->username);
75                 if (feed->auth->password != NULL)
76                         g_free(feed->auth->password);
77                 g_free(feed->auth);
78                 feed->auth = NULL;
79         }
80 }
81
82 void feed_free(Feed *feed)
83 {
84         if( feed == NULL )
85                 return; /* Return silently, without printing a glib error. */
86
87         g_free(feed->url);
88         _free_auth(feed);
89         g_free(feed->title);
90         g_free(feed->description);
91         g_free(feed->language);
92         g_free(feed->author);
93         g_free(feed->generator);
94         g_free(feed->link);
95         g_free(feed->fetcherr);
96         g_free(feed->cookies_path);
97         g_free(feed->cacert_file);
98
99         if( feed->items != NULL ) {
100                 g_slist_foreach(feed->items, _free_items, NULL);
101                 g_slist_free(feed->items);
102         }
103
104         g_free(feed);
105         feed = NULL;
106 }
107
108 void feed_free_items(Feed *feed)
109 {
110         if( feed == NULL )
111                 return;
112
113         if( feed->items != NULL ) {
114                 g_slist_foreach(feed->items, _free_items, NULL);
115                 g_slist_free(feed->items);
116                 feed->items = NULL;
117         }
118 }
119
120 /* Timeout */
121 void feed_set_timeout(Feed *feed, guint timeout)
122 {
123         g_return_if_fail(feed != NULL);
124         feed->timeout = timeout;
125 }
126
127 guint feed_get_timeout(Feed *feed)
128 {
129         g_return_val_if_fail(feed != NULL, 0);
130         return feed->timeout;
131 }
132
133 /* URL */
134 void feed_set_url(Feed *feed, gchar *url)
135 {
136         g_return_if_fail(feed != NULL);
137         g_return_if_fail(url != NULL);
138
139         if( feed->url != NULL ) {
140                 g_free(feed->url);
141                 feed->url = NULL;
142         }
143
144         feed->url = g_strdup(url);
145 }
146
147 gchar *feed_get_url(Feed *feed)
148 {
149         g_return_val_if_fail(feed != NULL, NULL);
150         return feed->url;
151 }
152
153 /* Auth */
154 void feed_set_auth(Feed *feed, FeedAuth *auth)
155 {
156         g_return_if_fail(feed != NULL);
157         g_return_if_fail(auth != NULL);
158
159         _free_auth(feed);
160         feed->auth = g_new0(FeedAuth, 1);
161         feed->auth->type = auth->type;
162         feed->auth->username = g_strdup(auth->username);
163         feed->auth->password = g_strdup(auth->password);
164 }
165
166 FeedAuth *feed_get_auth(Feed *feed)
167 {
168         g_return_val_if_fail(feed != NULL, NULL);
169         return feed->auth;
170 }
171
172 /* Title */
173 gchar *feed_get_title(Feed *feed)
174 {
175         g_return_val_if_fail(feed != NULL, NULL);
176         return feed->title;
177 }
178
179 void feed_set_title(Feed *feed, gchar *new_title)
180 {
181         g_return_if_fail(feed != NULL);
182         g_return_if_fail(new_title != NULL);
183
184         if (feed->title != NULL) {
185                 g_free(feed->title);
186                 feed->title = NULL;
187         }
188
189         feed->title = g_strdup(new_title);
190 }
191
192 /* Description */
193 gchar *feed_get_description(Feed *feed)
194 {
195         g_return_val_if_fail(feed != NULL, NULL);
196         return feed->description;
197 }
198
199 /* Language */
200 gchar *feed_get_language(Feed *feed)
201 {
202         g_return_val_if_fail(feed != NULL, NULL);
203         return feed->language;
204 }
205
206 /* Author */
207 gchar *feed_get_author(Feed *feed)
208 {
209         g_return_val_if_fail(feed != NULL, NULL);
210         return feed->author;
211 }
212
213 /* Generator */
214 gchar *feed_get_generator(Feed *feed)
215 {
216         g_return_val_if_fail(feed != NULL, NULL);
217         return feed->generator;
218 }
219
220 /* Fetch error (if not NULL, supplied by libcurl) */
221 gchar *feed_get_fetcherror(Feed *feed)
222 {
223         g_return_val_if_fail(feed != NULL, NULL);
224         return feed->fetcherr;
225 }
226
227 /* Returns number of items currently in the feed. */
228 gint feed_n_items(Feed *feed)
229 {
230         g_return_val_if_fail(feed != NULL, -1);
231
232         if( feed->items == NULL )       /* No items here. */
233                 return 0;
234
235         return g_slist_length(feed->items);
236 }
237
238 /* Returns nth item from feed. */
239 FeedItem *feed_nth_item(Feed *feed, guint n)
240 {
241         g_return_val_if_fail(feed != NULL, NULL);
242
243         return g_slist_nth_data(feed->items, n);
244 }
245
246 /* feed_update()
247  * Takes initialized feed with url set, fetches the feed from this url,
248  * updates rest of Feed struct members and returns HTTP response code
249  * we got from url's server. */
250 guint feed_update(Feed *feed, time_t last_update)
251 {
252         CURL *eh = NULL;
253         CURLcode res;
254         FeedParserCtx *feed_ctx = NULL;
255         glong response_code = 0;
256
257         g_return_val_if_fail(feed != NULL, FEED_ERR_NOFEED);
258         g_return_val_if_fail(feed->url != NULL, FEED_ERR_NOURL);
259
260         /* Init curl before anything else. */
261         eh = curl_easy_init();
262
263         g_return_val_if_fail(eh != NULL, FEED_ERR_INIT);
264
265         /* Curl initialized, create parser context now. */
266         feed_ctx = malloc( sizeof(FeedParserCtx) );
267
268         feed_ctx->parser = XML_ParserCreate(NULL);
269         feed_ctx->depth = 0;
270         feed_ctx->str = NULL;
271         feed_ctx->xhtml_str = NULL;
272         feed_ctx->feed = feed;
273         feed_ctx->location = 0;
274         feed_ctx->curitem = NULL;
275         feed_ctx->id_is_permalink = TRUE;
276
277         feed_ctx->name = NULL;
278         feed_ctx->mail = NULL;
279
280         /* Set initial expat handlers, which will take care of choosing
281          * correct parser later. */
282         feed_parser_set_expat_handlers(feed_ctx);
283
284         curl_easy_setopt(eh, CURLOPT_URL, feed->url);
285         curl_easy_setopt(eh, CURLOPT_NOPROGRESS, 1);
286 #ifdef CURLOPT_MUTE
287         curl_easy_setopt(eh, CURLOPT_MUTE, 1);
288 #endif
289         curl_easy_setopt(eh, CURLOPT_WRITEFUNCTION, feed_writefunc);
290         curl_easy_setopt(eh, CURLOPT_WRITEDATA, feed_ctx);
291         curl_easy_setopt(eh, CURLOPT_FOLLOWLOCATION, 1);
292         curl_easy_setopt(eh, CURLOPT_MAXREDIRS, 3);
293         curl_easy_setopt(eh, CURLOPT_TIMEOUT, feed->timeout);
294         curl_easy_setopt(eh, CURLOPT_NOSIGNAL, 1);
295         curl_easy_setopt(eh, CURLOPT_ENCODING, "");
296         curl_easy_setopt(eh, CURLOPT_USERAGENT, "libfeed 0.1");
297         curl_easy_setopt(eh, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
298
299         /* Use HTTP's If-Modified-Since feature, if application provided
300          * the timestamp of last update. */
301         if( last_update != -1 ) {
302                 curl_easy_setopt(eh, CURLOPT_TIMECONDITION,
303                                 CURL_TIMECOND_IFMODSINCE);
304                 curl_easy_setopt(eh, CURLOPT_TIMEVALUE, (long)last_update);
305         }
306
307 #if LIBCURL_VERSION_NUM >= 0x070a00
308         if (feed->ssl_verify_peer == FALSE) {
309                 curl_easy_setopt(eh, CURLOPT_SSL_VERIFYPEER, 0);
310                 curl_easy_setopt(eh, CURLOPT_SSL_VERIFYHOST, 0);
311         }
312 #endif
313
314         if (feed->cacert_file != NULL)
315                 curl_easy_setopt(eh, CURLOPT_CAINFO, feed->cacert_file);
316
317         if(feed->cookies_path != NULL)
318                 curl_easy_setopt(eh, CURLOPT_COOKIEFILE, feed->cookies_path);
319
320         if (feed->auth != NULL) {
321                 switch (feed->auth->type) {
322                 case FEED_AUTH_NONE:
323                         break;
324                 case FEED_AUTH_BASIC:
325                         curl_easy_setopt(eh, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
326                         curl_easy_setopt(eh, CURLOPT_USERNAME,
327                                          feed->auth->username);
328                         curl_easy_setopt(eh, CURLOPT_PASSWORD,
329                                          feed->auth->password);
330                         break;
331                 default:
332                         response_code = FEED_ERR_UNAUTH; /* unknown auth */
333                         goto cleanup;
334                 }
335         }
336
337         res = curl_easy_perform(eh);
338         XML_Parse(feed_ctx->parser, "", 0, TRUE);
339
340         if( res != CURLE_OK ) {
341                 feed->fetcherr = g_strdup(curl_easy_strerror(res));
342                 response_code = FEED_ERR_FETCH;
343         } else if (!feed->is_valid) {
344                 response_code = FEED_ERR_NOFEED;
345         } else {
346                 curl_easy_getinfo(eh, CURLINFO_RESPONSE_CODE, &response_code);
347         }
348
349 cleanup:
350         curl_easy_cleanup(eh);
351
352         /* Cleanup, we should be done. */
353         XML_ParserFree(feed_ctx->parser);
354         g_free(feed_ctx->name);
355         g_free(feed_ctx->mail);
356         if (feed_ctx->str != NULL)
357                 g_string_free(feed_ctx->str, TRUE);
358         if (feed_ctx->xhtml_str != NULL)
359                 g_string_free(feed_ctx->xhtml_str, TRUE);
360         g_free(feed_ctx);
361
362         return response_code;
363 }
364
365 void feed_foreach_item(Feed *feed, GFunc func, gpointer data)
366 {
367         g_return_if_fail(feed != NULL);
368         g_return_if_fail(feed->items != NULL);
369
370         g_slist_foreach(feed->items, func, data);
371 }
372
373 gboolean feed_prepend_item(Feed *feed, FeedItem *item)
374 {
375         g_return_val_if_fail(feed != NULL, FALSE);
376         g_return_val_if_fail(item != NULL, FALSE);
377
378         feed->items = g_slist_prepend(feed->items, item);
379         return TRUE;
380 }
381
382 gboolean feed_append_item(Feed *feed, FeedItem *item)
383 {
384         g_return_val_if_fail(feed != NULL, FALSE);
385         g_return_val_if_fail(item != NULL, FALSE);
386
387         feed->items = g_slist_append(feed->items, item);
388         return TRUE;
389 }
390
391 gboolean feed_insert_item(Feed *feed, FeedItem *item, gint pos)
392 {
393         g_return_val_if_fail(feed != NULL, FALSE);
394         g_return_val_if_fail(item != NULL, FALSE);
395         g_return_val_if_fail(pos < 0, FALSE);
396
397         feed->items = g_slist_insert(feed->items, item, pos);
398         return TRUE;
399 }
400
401 gchar *feed_get_cookies_path(Feed *feed)
402 {
403         g_return_val_if_fail(feed != NULL, NULL);
404         return feed->cookies_path;
405 }
406
407 void feed_set_cookies_path(Feed *feed, gchar *path)
408 {
409         g_return_if_fail(feed != NULL);
410
411         if( feed->cookies_path != NULL ) {
412                 g_free(feed->cookies_path);
413                 feed->cookies_path = NULL;
414         }
415
416         feed->cookies_path = (path != NULL ? g_strdup(path) : NULL);
417 }
418
419 gboolean feed_get_ssl_verify_peer(Feed *feed)
420 {
421         g_return_val_if_fail(feed != NULL, FALSE);
422         return feed->ssl_verify_peer;
423 }
424
425 void feed_set_ssl_verify_peer(Feed *feed, gboolean ssl_verify_peer)
426 {
427         g_return_if_fail(feed != NULL);
428         feed->ssl_verify_peer = ssl_verify_peer;
429 }
430
431 gchar *feed_get_cacert_file(Feed *feed)
432 {
433         g_return_val_if_fail(feed != NULL, NULL);
434         return feed->cacert_file;
435 }
436
437 void feed_set_cacert_file(Feed *feed, const gchar *path)
438 {
439         g_return_if_fail(feed != NULL);
440
441         if( feed->cacert_file != NULL ) {
442                 g_free(feed->cacert_file);
443                 feed->cacert_file = NULL;
444         }
445
446         feed->cacert_file = (path != NULL ? g_strdup(path) : NULL);
447 }