fix CID 1596595: Resource leaks, and CID 1596594: (CHECKED_RETURN)
[claws.git] / src / plugins / rssyl / parse822.c
1 /*
2  * Claws Mail -- a GTK based, lightweight, and fast e-mail client
3  * Copyright (C) 1999-2004 Hiroyuki Yamamoto
4  * This file (C) 2005 Andrej Kacian <andrej@kacian.sk>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19  */
20
21 #ifdef HAVE_CONFIG_H
22 #  include "config.h"
23 #endif
24
25 /* Global includes */
26 #include <sys/stat.h>
27 #include <glib.h>
28 #include <pthread.h>
29
30 /* Claws Mail includes */
31 #include <common/claws.h>
32 #include <procheader.h>
33 #include <common/utils.h>
34 #include <main.h>
35
36 /* Local includes */
37 #include "libfeed/feed.h"
38 #include "libfeed/feeditem.h"
39 #include "libfeed/date.h"
40 #include "parse822.h"
41 #include "rssyl_feed.h"
42 #include "rssyl_parse_feed.h"
43 #include "strutils.h"
44
45 /* rssyl_parse_folder_item_file()
46  *
47  * Parse a RFC822-formatted feed item given by "path", and returns a
48  * pointer to a newly-allocated FeedItem struct, which contains all required data.
49  *
50  */
51 FeedItem *rssyl_parse_folder_item_file(gchar *path)
52 {
53         gchar *contents, **lines, **line, **splid, *tmp, *tmp2;
54         GError *error = NULL;
55         FeedItem *item;
56         RFeedCtx *ctx;
57         gint i = 0;
58         GString *body = NULL;
59         gboolean parsing_headers = TRUE, past_html_tag = FALSE, past_endhtml_tag = FALSE;
60         gboolean started_author = FALSE, started_subject = FALSE;
61         gboolean started_link = FALSE, started_clink = FALSE, got_original_title = FALSE;
62
63         debug_print("RSSyl: parsing '%s'\n", path);
64
65         if( !g_file_get_contents(path, &contents, NULL, &error) ) {
66                 g_warning("error: '%s'", error->message);
67                 g_error_free(error);
68         }
69
70         if( contents != NULL ) {
71                 lines = strsplit_no_copy(contents, '\n');
72         } else {
73                 g_warning("badly formatted file found, ignoring: '%s'", path);
74                 return NULL;
75         }
76
77         ctx = g_new0(RFeedCtx, 1);
78         ctx->path = g_strdup(path); /* store filesystem path to source file */
79         ctx->last_seen = 0;
80
81         item = feed_item_new(NULL);
82         item->data = ctx;
83
84         while( lines[i] ) {
85                 if( parsing_headers && lines[i] && !strlen(lines[i]) ) {
86                         parsing_headers = FALSE;
87                         debug_print("RSSyl: finished parsing headers\n");
88                 }
89
90                 if( parsing_headers ) {
91                         line = g_strsplit(lines[i], ": ", 2);
92                         if( line[0] && line[1] && strlen(line[0]) && lines[i][0] != ' ') {
93                                 started_author = FALSE;
94                                 started_subject = FALSE;
95                                 started_link = FALSE;
96                                 started_clink = FALSE;
97
98                                 /* Author */
99                                 if( !strcmp(line[0], "From") ) {
100                                         feed_item_set_author(item, line[1]);
101                                         debug_print("RSSyl: got author '%s'\n", feed_item_get_author(item));
102                                         started_author = TRUE;
103                                 }
104
105                                 /* Date (set both FeedItem timestamps) */
106                                 if( !strcmp(line[0], "Date") ) {
107                                         feed_item_set_date_modified(item,
108                                                         procheader_date_parse(NULL, line[1], 0));
109                                         feed_item_set_date_published(item,
110                                                         feed_item_get_date_modified(item));
111                                         debug_print("RSSyl: got date \n" );
112                                 }
113
114                                 /* Title */
115                                 if( !strcmp(line[0], "Subject") && !got_original_title ) {
116                                         feed_item_set_title(item,line[1]);
117                                         debug_print("RSSyl: got title '%s'\n", feed_item_get_title(item));
118                                         started_subject = TRUE;
119                                 }
120
121                                 /* Original (including HTML) title - Atom feeds */
122                                 if( !strcmp(line[0], "X-RSSyl-OrigTitle") ) {
123                                         feed_item_set_title(item, line[1]);
124                                         debug_print("RSSyl: got original title '%s'\n",
125                                                         feed_item_get_title(item));
126                                         got_original_title = TRUE;
127                                 }
128
129                                 /* URL */
130                                 if( !strcmp(line[0], "X-RSSyl-URL") ) {
131                                         feed_item_set_url(item, line[1]);
132                                         debug_print("RSSyl: got link '%s'\n", feed_item_get_url(item));
133                                         started_link = TRUE;
134                                 }
135
136                                 /* Last-Seen timestamp */
137                                 if( !strcmp(line[0], "X-RSSyl-Last-Seen") ) {
138                                         ctx->last_seen = atol(line[1]);
139                                         debug_print("RSSyl: got last_seen timestamp %"CM_TIME_FORMAT"\n", ctx->last_seen);
140                                 }
141
142                                 /* ID */
143                                 if( !strcmp(line[0], "Message-ID") ) {
144                                         if (line[1][0] != '<' || line[1][strlen(line[1])-1] != '>') {
145                                                 debug_print("RSSyl: malformed Message-ID, ignoring...\n");
146                                         } else {
147                                                 /* Get the ID from within < and >. */
148                                                 tmp = line[1] + 1;
149                                                 tmp2 = g_strndup(tmp, strlen(tmp) - 1);
150                                                 feed_item_set_id(item, tmp2);
151                                                 g_free(tmp2);
152                                         }
153                                 }
154
155                                 /* Feed comments */
156                                 if( !strcmp(line[0], "X-RSSyl-Comments") ) {
157                                         feed_item_set_comments_url(item, line[1]);
158                                         debug_print("RSSyl: got clink '%s'\n", feed_item_get_comments_url(item));
159                                         started_clink = TRUE;
160                                 }
161
162                                 /* References */
163                                 if( !strcmp(line[0], "References") ) {
164                                         splid = g_strsplit_set(line[1], "<>", 3);
165                                         if( strlen(splid[1]) != 0 )
166                                                 feed_item_set_parent_id(item, line[1]);
167                                         g_strfreev(splid);
168                                 }
169
170                         } else if (lines[i][0] == ' ') {
171                                 gchar *tmp = NULL;
172                                 /* continuation line */
173                                 if (started_author) {
174                                         tmp = g_strdup_printf("%s %s", feed_item_get_author(item), lines[i]+1);
175                                         feed_item_set_author(item, tmp);
176                                         debug_print("RSSyl: updated author to '%s'\n", tmp);
177                                         g_free(tmp);
178                                 } else if (started_subject) {
179                                         tmp = g_strdup_printf("%s %s", feed_item_get_title(item), lines[i]+1);
180                                         feed_item_set_title(item, tmp);
181                                         debug_print("RSSyl: updated title to '%s'\n", tmp);
182                                         g_free(tmp);
183                                 } else if (started_link) {
184                                         tmp = g_strdup_printf("%s%s", feed_item_get_url(item), lines[i]+1);
185                                         feed_item_set_url(item, tmp);
186                                         debug_print("RSSyl: updated link to '%s'\n", tmp);
187                                         g_free(tmp);
188                                 } else if (started_clink) {
189                                         tmp = g_strdup_printf("%s%s", feed_item_get_comments_url(item), lines[i]+1);
190                                         feed_item_set_comments_url(item, tmp);
191                                         debug_print("RSSyl: updated comments_link to '%s'\n", tmp);
192                                 }
193                         }
194                         g_strfreev(line);
195                 } else {
196                         if( !strcmp(lines[i], RSSYL_TEXT_START) ) {
197                                 debug_print("RSSyl: Leading html tag found at line %d\n", i);
198                                 past_html_tag = TRUE;
199                                 if (body)
200                                 {
201                                         g_warning("unexpected leading html tag found at line %d", i);
202                                     g_string_free(body, TRUE);
203                                 }
204                                 body = g_string_new("");
205                                 i++;
206                                 continue;
207                         }
208                         while( past_html_tag && !past_endhtml_tag && lines[i] ) {
209                                 if( !strcmp(lines[i], RSSYL_TEXT_END) ) {
210                                         debug_print("RSSyl: Trailing html tag found at line %d\n", i);
211                                         past_endhtml_tag = TRUE;
212                                         continue;
213                                 }
214
215                                 if (body->len > 0)
216                                         body = g_string_append_c(body, '\n');
217                                 body = g_string_append(body, lines[i]);
218
219                                 i++;
220                         }
221
222                 }
223
224                 i++;
225         }
226
227         if (body != NULL ) {
228                 if (past_html_tag && past_endhtml_tag && body->str != NULL)
229                         feed_item_set_text(item, body->str);
230                 g_string_free(body, TRUE);
231         }
232
233         g_free(lines);
234         g_free(contents);
235         return item;
236 }
237
238 static void rssyl_flush_folder_func(gpointer data, gpointer user_data)
239 {
240         FeedItem *item = (FeedItem *)data;
241         RFeedCtx *ctx = (RFeedCtx *)item->data;
242
243         if( ctx != NULL && ctx->path != NULL) {
244                 g_free(ctx->path);
245         }
246         feed_item_free(item);
247 }
248
249 static void rssyl_folder_read_existing_real(RFolderItem *ritem)
250 {
251         gchar *path = NULL, *fname = NULL;
252         GDir *dp;
253         const gchar *d;
254         GError *error = NULL;
255         gint num;
256         FeedItem *item = NULL;
257         RFeedCtx *ctx;
258
259         g_return_if_fail(ritem != NULL);
260
261         path = folder_item_get_path(&ritem->item);
262         g_return_if_fail(path != NULL);
263
264         debug_print("RSSyl: reading existing items from '%s'\n", path);
265
266         /* Flush contents if any, so we can add new */
267         if( g_slist_length(ritem->items) > 0 ) {
268                 g_slist_foreach(ritem->items, (GFunc)rssyl_flush_folder_func, NULL);
269                 g_slist_free(ritem->items);
270         }
271         ritem->items = NULL;
272         ritem->last_update = 0;
273
274         if( (dp = g_dir_open(path, 0, &error)) == NULL ) {
275                 debug_print("g_dir_open on \"%s\" failed with error %d (%s)\n",
276                                 path, error->code, error->message);
277                 g_error_free(error);
278                 g_free(path);
279                 return;
280         }
281
282         while( (d = g_dir_read_name(dp)) != NULL ) {
283                 if( claws_is_exiting() ) {
284                         g_dir_close(dp);
285                         g_free(path);
286                         return;
287                 }
288
289                 if( d[0] != '.' && (num = to_number(d)) > 0 ) {
290                         fname = g_strdup_printf("%s%c%s", path, G_DIR_SEPARATOR, d);
291                         if (!g_file_test(fname, G_FILE_TEST_IS_REGULAR)) {
292                                 debug_print("RSSyl: not a regular file: '%s', ignoring it\n", fname);
293                                 g_free(fname);
294                                 continue;
295                         }
296
297                         debug_print("RSSyl: starting to parse '%s'\n", d);
298                         if( (item = rssyl_parse_folder_item_file(fname)) != NULL ) {
299                                 /* Find latest timestamp */
300                                 ctx = (RFeedCtx *)item->data;
301                                 if( ritem->last_update < ctx->last_seen )
302                                         ritem->last_update = ctx->last_seen;
303                                 debug_print("RSSyl: Appending '%s'\n", feed_item_get_title(item));
304                                 ritem->items = g_slist_prepend(ritem->items, item);
305                         }
306                         g_free(fname);
307                 }
308         }
309
310         g_dir_close(dp);
311         g_free(path);
312
313         ritem->items = g_slist_reverse(ritem->items);
314 }
315
316 #ifdef USE_PTHREAD
317 static void *rssyl_read_existing_thr(void *arg)
318 {
319         RParseCtx *ctx = (RParseCtx *)arg;
320
321         rssyl_folder_read_existing_real(ctx->ritem);
322         ctx->ready = TRUE;
323         return NULL;
324 }
325 #endif
326
327 void rssyl_folder_read_existing(RFolderItem *ritem)
328 {
329 #ifdef USE_PTHREAD
330         RParseCtx *ctx;
331         pthread_t pt;
332 #endif
333
334         g_return_if_fail(ritem != NULL);
335
336
337 #ifdef USE_PTHREAD
338         ctx = g_new0(RParseCtx, 1);
339         ctx->ritem = ritem;
340         ctx->ready = FALSE;
341
342         if( pthread_create(&pt, NULL, rssyl_read_existing_thr,
343                                 (void *)ctx) != 0 ) {
344                 /* Couldn't create thread, let's continue non-threaded. */
345                 rssyl_folder_read_existing_real(ritem);
346         } else {
347                 /* Thread started, wait until it is done. */
348                 debug_print("RSSyl: waiting for thread to finish\n");
349                 while( !ctx->ready ) {
350                         claws_do_idle();
351                 }
352
353                 debug_print("RSSyl: thread finished\n");
354                 pthread_join(pt, NULL);
355         }
356
357         g_free(ctx);
358 #else
359         rssyl_folder_read_existing_real(ritem);
360 #endif
361 }