Fix memory leak when freeing RSSyl's FeedItem struct.
[claws.git] / src / plugins / rssyl / parse822.c
1 /*
2  * Claws-Mail-- a GTK+ based, lightweight, and fast e-mail client
3  * Copyright (C) 1999-2004 Hiroyuki Yamamoto
4  * This file (C) 2005 Andrej Kacian <andrej@kacian.sk>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19  */
20
21 #ifdef HAVE_CONFIG_H
22 #  include "config.h"
23 #endif
24
25 /* Global includes */
26 #include <sys/stat.h>
27 #include <glib.h>
28 #include <pthread.h>
29
30 /* Claws Mail includes */
31 #include <common/claws.h>
32 #include <procheader.h>
33 #include <common/utils.h>
34 #include <main.h>
35
36 /* Local includes */
37 #include "libfeed/feed.h"
38 #include "libfeed/feeditem.h"
39 #include "libfeed/date.h"
40 #include "parse822.h"
41 #include "rssyl_feed.h"
42 #include "rssyl_parse_feed.h"
43 #include "strutils.h"
44
45 /* rssyl_parse_folder_item_file()
46  *
47  * Parse a RFC822-formatted feed item given by "path", and returns a
48  * pointer to a newly-allocated FeedItem struct, which contains all required data.
49  *
50  */
51 FeedItem *rssyl_parse_folder_item_file(gchar *path)
52 {
53         gchar *contents, **lines, **line, **splid, *tmp, *tmp2;
54         GError *error = NULL;
55         FeedItem *item;
56         RFeedCtx *ctx;
57         gint i = 0;
58         gboolean parsing_headers = TRUE, past_html_tag = FALSE, past_endhtml_tag = FALSE;
59         gboolean started_author = FALSE, started_subject = FALSE;
60         gboolean started_link = FALSE, started_clink = FALSE, got_original_title = FALSE;
61
62         debug_print("RSSyl: parsing '%s'\n", path);
63
64         g_file_get_contents(path, &contents, NULL, &error);
65
66         if( error ) {
67                 g_warning("GError: '%s'", error->message);
68                 g_error_free(error);
69         }
70
71         if( contents != NULL ) {
72                 lines = strsplit_no_copy(contents, '\n');
73         } else {
74                 g_warning("Badly formatted file found, ignoring: '%s'", path);
75                 return NULL;
76         }
77
78         ctx = g_new0(RFeedCtx, 1);
79         ctx->path = g_strdup(path); /* store filesystem path to source file */
80         ctx->last_seen = 0;
81
82         item = feed_item_new(NULL);
83         item->data = ctx;
84
85         while( lines[i] ) {
86                 if( parsing_headers && lines[i] && !strlen(lines[i]) ) {
87                         parsing_headers = FALSE;
88                         debug_print("RSSyl: finished parsing headers\n");
89                 }
90
91                 if( parsing_headers ) {
92                         line = g_strsplit(lines[i], ": ", 2);
93                         if( line[0] && line[1] && strlen(line[0]) && lines[i][0] != ' ') {
94                                 started_author = FALSE;
95                                 started_subject = FALSE;
96                                 started_link = FALSE;
97                                 started_clink = FALSE;
98
99                                 /* Author */
100                                 if( !strcmp(line[0], "From") ) {
101                                         feed_item_set_author(item, line[1]);
102                                         debug_print("RSSyl: got author '%s'\n", feed_item_get_author(item));
103                                         started_author = TRUE;
104                                 }
105
106                                 /* Date */
107                                 if( !strcmp(line[0], "Date") ) {
108                                         feed_item_set_date_modified(item,
109                                                         procheader_date_parse(NULL, line[1], 0));
110                                         debug_print("RSSyl: got date \n" );
111                                 }
112
113                                 /* Title */
114                                 if( !strcmp(line[0], "Subject") && !got_original_title ) {
115                                         feed_item_set_title(item,line[1]);
116                                         debug_print("RSSyl: got title '%s'\n", feed_item_get_title(item));
117                                         started_subject = TRUE;
118                                 }
119
120                                 /* Original (including HTML) title - Atom feeds */
121                                 if( !strcmp(line[0], "X-RSSyl-OrigTitle") ) {
122                                         feed_item_set_title(item, line[1]);
123                                         debug_print("RSSyl: got original title '%s'\n",
124                                                         feed_item_get_title(item));
125                                         got_original_title = TRUE;
126                                 }
127
128                                 /* URL */
129                                 if( !strcmp(line[0], "X-RSSyl-URL") ) {
130                                         feed_item_set_url(item, line[1]);
131                                         debug_print("RSSyl: got link '%s'\n", feed_item_get_url(item));
132                                         started_link = TRUE;
133                                 }
134
135                                 /* Last-Seen timestamp */
136                                 if( !strcmp(line[0], "X-RSSyl-Last-Seen") ) {
137                                         ctx->last_seen = atol(line[1]);
138                                         debug_print("RSSyl: got last_seen timestamp %lld\n", (long long)ctx->last_seen);
139                                 }
140
141                                 /* ID */
142                                 if( !strcmp(line[0], "Message-ID") ) {
143                                         if (line[1][0] != '<' || line[1][strlen(line[1])-1] != '>') {
144                                                 debug_print("RSSyl: malformed Message-ID, ignoring...\n");
145                                         } else {
146                                                 /* Get the ID from within < and >. */
147                                                 tmp = line[1] + 1;
148                                                 tmp2 = g_strndup(tmp, strlen(tmp) - 1);
149                                                 feed_item_set_id(item, tmp2);
150                                                 g_free(tmp2);
151                                         }
152                                 }
153
154                                 /* Feed comments */
155                                 if( !strcmp(line[0], "X-RSSyl-Comments") ) {
156                                         feed_item_set_comments_url(item, line[1]);
157                                         debug_print("RSSyl: got clink '%s'\n", feed_item_get_comments_url(item));
158                                         started_clink = TRUE;
159                                 }
160
161                                 /* References */
162                                 if( !strcmp(line[0], "References") ) {
163                                         splid = g_strsplit_set(line[1], "<>", 3);
164                                         if( strlen(splid[1]) != 0 )
165                                                 feed_item_set_parent_id(item, line[1]);
166                                         g_strfreev(splid);
167                                 }
168
169                         } else if (lines[i][0] == ' ') {
170                                 gchar *tmp = NULL;
171                                 /* continuation line */
172                                 if (started_author) {
173                                         tmp = g_strdup_printf("%s %s", feed_item_get_author(item), lines[i]+1);
174                                         feed_item_set_author(item, tmp);
175                                         debug_print("RSSyl: updated author to '%s'\n", tmp);
176                                         g_free(tmp);
177                                 } else if (started_subject) {
178                                         tmp = g_strdup_printf("%s %s", feed_item_get_title(item), lines[i]+1);
179                                         feed_item_set_title(item, tmp);
180                                         debug_print("RSSyl: updated title to '%s'\n", tmp);
181                                         g_free(tmp);
182                                 } else if (started_link) {
183                                         tmp = g_strdup_printf("%s%s", feed_item_get_url(item), lines[i]+1);
184                                         feed_item_set_url(item, tmp);
185                                         debug_print("RSSyl: updated link to '%s'\n", tmp);
186                                         g_free(tmp);
187                                 } else if (started_clink) {
188                                         tmp = g_strdup_printf("%s%s", feed_item_get_comments_url(item), lines[i]+1);
189                                         feed_item_set_comments_url(item, tmp);
190                                         debug_print("RSSyl: updated comments_link to '%s'\n", tmp);
191                                 }
192                         }
193                         g_strfreev(line);
194                 } else {
195                         if( !strcmp(lines[i], RSSYL_TEXT_START) ) {
196                                 debug_print("RSSyl: Leading html tag found at line %d\n", i);
197                                 past_html_tag = TRUE;
198                                 i++;
199                                 continue;
200                         }
201                         while( past_html_tag && !past_endhtml_tag && lines[i] ) {
202                                 if( !strcmp(lines[i], RSSYL_TEXT_END) ) {
203                                         debug_print("RSSyl: Trailing html tag found at line %d\n", i);
204                                         past_endhtml_tag = TRUE;
205                                         i++;
206                                         continue;
207                                 }
208                                 if( feed_item_get_text(item) != NULL ) {
209                                         gint e_len, n_len;
210                                         e_len = strlen(item->text);
211                                         n_len = strlen(lines[i]);
212                                         item->text = g_realloc(item->text, e_len + n_len + 2);
213                                         *(item->text+e_len) = '\n';
214                                         strcpy(item->text+e_len+1, lines[i]);
215                                         *(item->text+e_len+n_len+1) = '\0';
216                                 } else {
217                                         item->text = g_strdup(lines[i]);
218                                 }
219                                 i++;
220                         }
221
222                         if( lines[i] == NULL )
223                                 return item;
224                 }
225
226                 i++;
227         }
228         g_free(lines);
229         g_free(contents);
230         return item;
231 }
232
233 static void rssyl_flush_folder_func(gpointer data, gpointer user_data)
234 {
235         FeedItem *item = (FeedItem *)data;
236         RFeedCtx *ctx = (RFeedCtx *)item->data;
237
238         if( ctx != NULL && ctx->path != NULL) {
239                 g_free(ctx->path);
240                 g_free(ctx);
241         }
242         feed_item_free(item);
243 }
244
245 static void rssyl_folder_read_existing_real(RFolderItem *ritem)
246 {
247         gchar *path = NULL, *fname = NULL;
248         GDir *dp;
249         const gchar *d;
250         GError *error = NULL;
251         gint num;
252         FeedItem *item = NULL;
253         RFeedCtx *ctx;
254
255         g_return_if_fail(ritem != NULL);
256
257         path = folder_item_get_path(&ritem->item);
258         g_return_if_fail(path != NULL);
259
260         debug_print("RSSyl: reading existing items from '%s'\n", path);
261
262         /* Flush contents if any, so we can add new */
263         if( g_slist_length(ritem->items) > 0 ) {
264                 g_slist_foreach(ritem->items, (GFunc)rssyl_flush_folder_func, NULL);
265                 g_slist_free(ritem->items);
266         }
267         ritem->items = NULL;
268         ritem->last_update = 0;
269
270         if( (dp = g_dir_open(path, 0, &error)) == NULL ) {
271                 debug_print("g_dir_open on \"%s\" failed with error %d (%s)\n",
272                                 path, error->code, error->message);
273                 g_error_free(error);
274                 g_free(path);
275                 return;
276         }
277
278         while( (d = g_dir_read_name(dp)) != NULL ) {
279                 if( claws_is_exiting() ) {
280                         g_dir_close(dp);
281                         g_free(path);
282                         return;
283                 }
284
285                 if( d[0] != '.' && (num = to_number(d)) > 0 ) {
286                         fname = g_strdup_printf("%s%c%s", path, G_DIR_SEPARATOR, d);
287                         if (!g_file_test(fname, G_FILE_TEST_IS_REGULAR)) {
288                                 debug_print("RSSyl: not a regular file: '%s', ignoring it\n", fname);
289                                 g_free(fname);
290                                 continue;
291                         }
292
293                         debug_print("RSSyl: starting to parse '%s'\n", d);
294                         if( (item = rssyl_parse_folder_item_file(fname)) != NULL ) {
295                                 /* Find latest timestamp */
296                                 ctx = (RFeedCtx *)item->data;
297                                 if( ritem->last_update < ctx->last_seen )
298                                         ritem->last_update = ctx->last_seen;
299                                 debug_print("RSSyl: Appending '%s'\n", feed_item_get_title(item));
300                                 ritem->items = g_slist_prepend(ritem->items, item);
301                         }
302                         g_free(fname);
303                 }
304         }
305
306         g_dir_close(dp);
307         g_free(path);
308
309         ritem->items = g_slist_reverse(ritem->items);
310 }
311
312 #ifdef USE_PTHREAD
313 static void *rssyl_read_existing_thr(void *arg)
314 {
315         RParseCtx *ctx = (RParseCtx *)arg;
316
317         rssyl_folder_read_existing_real(ctx->ritem);
318         ctx->ready = TRUE;
319         return NULL;
320 }
321 #endif
322
323 void rssyl_folder_read_existing(RFolderItem *ritem)
324 {
325 #ifdef USE_PTHREAD
326         RParseCtx *ctx;
327         pthread_t pt;
328 #endif
329
330         g_return_if_fail(ritem != NULL);
331
332
333 #ifdef USE_PTHREAD
334         ctx = g_new0(RParseCtx, 1);
335         ctx->ritem = ritem;
336         ctx->ready = FALSE;
337
338         if( pthread_create(&pt, PTHREAD_CREATE_JOINABLE, rssyl_read_existing_thr,
339                                 (void *)ctx) != 0 ) {
340                 /* Couldn't create thread, let's continue non-threaded. */
341                 rssyl_folder_read_existing_real(ritem);
342         } else {
343                 /* Thread started, wait until it is done. */
344                 debug_print("RSSyl: waiting for thread to finish\n");
345                 while( !ctx->ready ) {
346                         claws_do_idle();
347                 }
348
349                 debug_print("RSSyl: thread finished\n");
350                 pthread_join(pt, NULL);
351         }
352
353         g_free(ctx);
354 #else
355         rssyl_folder_read_existing_real(ritem);
356 #endif
357 }