RSSyl: Fix some more issues reported by Coverity.
[claws.git] / src / plugins / rssyl / rssyl_add_item.c
1 /*
2  * Claws-Mail-- a GTK+ based, lightweight, and fast e-mail client
3  * Copyright (C) 1999-2004 Hiroyuki Yamamoto
4  * This file (C) 2005 Andrej Kacian <andrej@kacian.sk>
5  *
6  * - DESCRIPTION HERE
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21  */
22
23 #ifdef HAVE_CONFIG_H
24 #  include "config.h"
25 #endif
26
27 /* Global includes */
28 #include <glib.h>
29 #include <glib/gi18n.h>
30 #include <string.h>
31
32 /* Claws Mail includes */
33 #include <codeconv.h>
34 #include <procmsg.h>
35 #include <common/utils.h>
36
37 /* Local includes */
38 #include "libfeed/date.h"
39 #include "libfeed/feeditem.h"
40 #include "parse822.h"
41 #include "rssyl.h"
42 #include "rssyl_deleted.h"
43 #include "rssyl_feed.h"
44 #include "rssyl_parse_feed.h"
45 #include "strutils.h"
46
47 /* rssyl_cb_feed_compare()
48  *
49  * GCompareFunc function called by glib2's g_slist_find_custom().
50  */
51
52 static gint rssyl_cb_feed_compare(const FeedItem *a, const FeedItem *b)
53 {
54         gboolean date_eq = FALSE, url_eq = FALSE, title_eq = FALSE;
55         gboolean no_url = FALSE, no_date = FALSE, no_title = FALSE;
56         gchar *atit = NULL, *btit = NULL;
57
58         g_return_val_if_fail(a != NULL && b != NULL, 1);
59
60         /* ID should be unique. If it matches, we've found what we came for. */
61         if( (a->id != NULL) && (b->id != NULL) ) {
62                         if( !strcmp(a->id, b->id) ) {
63                                 return 0;
64                         }
65
66                         /* If both IDs are present, but they do not match, these are not the
67                          * droids we're looking for. */
68                         return 1;
69         }
70
71         /* Ok, we have no ID to aid us. Let's have a look at item timestamps
72          * and item title & url. */
73         if( (a->url != NULL) && (b->url != NULL) ) {
74                 if( !strcmp(a->url, b->url) )
75                         url_eq = TRUE;
76         } else
77                 no_url = TRUE;
78
79         if( (a->title != NULL) && (b->title != NULL) ) {
80                 atit = conv_unmime_header(a->title, CS_UTF_8, FALSE);
81                 btit = conv_unmime_header(b->title, CS_UTF_8, FALSE);
82                 if( !strcmp(atit, btit) )
83                         title_eq = TRUE;
84                 g_free(atit);
85                 g_free(btit);
86         } else
87                 no_title = TRUE;
88
89         /* If there's no 'published' timestamp for the item, we can only judge
90          * by item url - 'modified' timestamp can have changed if the item was
91          * updated recently. */
92         if( b->date_published <= 0 ) {
93                 if( b->date_modified > 0 ) {
94                         /* If the item has 'modified' timestamp, we can only rely on url
95                          * and title at this point. */
96                         if( (url_eq || no_url) && title_eq
97                                 && (a->date_modified >= b->date_modified) )
98                                 return 0;
99                         else
100                                 return 1;
101                 } else {
102                         /* No timestamp of any kind, we'll just assume if both title and url
103                          * match, we found the right item. Items in such feeds rarely change,
104                          * and if they do, there's no way we can really */
105                         if( (url_eq || no_url) && title_eq )
106                                 return 0;
107                         else
108                                 return 1;
109                 }
110         }
111
112         /* Check if 'published' or at least 'modified' timestamps match */
113         if( ((a->date_published > 0) && (b->date_published > 0) &&
114                         (a->date_published == b->date_published))
115                         || ((a->date_modified > 0) && (b->date_modified > 0) &&
116                         (a->date_modified == b->date_modified))) {
117                 date_eq = TRUE;
118         } else
119                 no_date = TRUE;
120
121         /* If 'published' time and item url match, it is reasonable to assume
122          * we found our item. */
123         if( (no_url || url_eq) && date_eq )
124                 return 0;
125
126         /* There is no timestamp and the url matches (or there is none),
127          * we need to compare titles, ... */
128         if( (no_url || url_eq) && no_date ) {
129                 if( title_eq )
130                         return 0;
131                 else
132                         return 1;
133         }
134
135         /* ... and as a last resort, if there is no title, item texts. */
136         if( no_title && a->text && b->text ) {
137                 if( !strcmp(a->text, b->text) )
138                         return 0;
139                 else
140                         return 1;
141         }
142
143         /* We don't know this item. */
144         return 1;
145 }
146
147 enum {
148         ITEM_UNCHANGED,
149         ITEM_CHANGED_TEXTONLY,
150         ITEM_CHANGED
151 };
152
153 static gint rssyl_feed_item_changed(FeedItem *new_item, FeedItem *old_item )
154 {
155         debug_print("RSSyl: comparing '%s' and '%s'\n",
156                         new_item->title, old_item->title);
157
158         /* if both have title ... */
159         if( old_item->title && new_item->title ) {
160                 gchar *old = conv_unmime_header(old_item->title, CS_UTF_8, FALSE);
161                 gchar *new = conv_unmime_header(new_item->title, CS_UTF_8, FALSE);
162                 if( strcmp(old, new) != 0 ) { /* ... compare "unmimed" titles */
163                         debug_print("RSSyl:\t\titem titles differ:\nOLD: '%s'\nNEW: '%s'\n",
164                                         old, new);
165                         g_free(old);
166                         g_free(new);
167                         return ITEM_CHANGED;
168                 }
169                 g_free(old);
170                 g_free(new);
171         } else {
172                 /* if atleast one has a title, they differ */
173                 if( old_item->title || new_item->title ) {
174                         debug_print("RSSyl:\t\t+/- title\n");
175                         return ITEM_CHANGED;
176                 }
177         }
178
179         if( old_item->author && new_item->author ) {
180                 gchar *old = conv_unmime_header(old_item->author, CS_UTF_8, TRUE);
181                 gchar *new = conv_unmime_header(new_item->author, CS_UTF_8, TRUE);
182                 if( strcmp(old, new) ) {  /* ... compare "unmimed" authors */
183                         g_free(old);
184                         g_free(new);
185                         debug_print("RSSyl:\t\titem authors differ\n");
186                         return ITEM_CHANGED;
187                 }
188                 g_free(old);
189                 g_free(new);
190         } else {
191                 /* if atleast one has author, they differ */
192                 if( old_item->author || new_item->author ) {
193                         debug_print("RSSyl:\t\t+/- author\n");
194                         return ITEM_CHANGED;
195                 }
196         }
197
198         /* if both have text ... */
199         if( old_item->text && new_item->text ) {
200                 if( strcmp(old_item->text, new_item->text) ) { /* ... compare them */
201                         debug_print("RSSyl:\t\titem texts differ\n");
202                         debug_print("\nOLD: '%s'\n", old_item->text);
203                         debug_print("\nNEW: '%s'\n", new_item->text);
204
205                         return ITEM_CHANGED_TEXTONLY;
206                 }
207         } else {
208                 /* if at least one has some text, they differ */
209                 if( old_item->text || new_item->text ) {
210                         debug_print("RSSyl:\t\t+/- text\n");
211                         return ITEM_CHANGED_TEXTONLY;
212                 }
213         }
214
215         /* they don't seem to differ */
216         return ITEM_UNCHANGED;
217 }
218
219 enum {
220         EXISTS_NEW,
221         EXISTS_UNCHANGED,
222         EXISTS_CHANGED,
223         EXISTS_CHANGED_TEXTONLY
224 };
225
226 /* rssyl_feed_item_exists()
227  *
228  * Returns 1 if a feed item already exists locally, 2 if there's a changed
229  * item with link that already belongs to existing item, 3 if only item's
230  * text has changed, 0 if item is new.
231  */
232
233 static guint rssyl_feed_item_exists(RFolderItem *ritem, FeedItem *fitem,
234                 FeedItem **oldfitem)
235 {
236         GSList *item = NULL;
237         FeedItem *efitem = NULL;
238         gint changed;
239
240         g_return_val_if_fail(ritem != NULL, FALSE);
241         g_return_val_if_fail(fitem != NULL, FALSE);
242
243         if( ritem->items == NULL || g_slist_length(ritem->items) == 0 )
244                 return EXISTS_NEW;
245
246         if( (item = g_slist_find_custom(ritem->items,
247                                         (gconstpointer)fitem, (GCompareFunc)rssyl_cb_feed_compare)) ) {
248                 efitem = (FeedItem *)item->data;
249                 if( (changed = rssyl_feed_item_changed(fitem, efitem)) > ITEM_UNCHANGED ) {
250                         *oldfitem = efitem;
251                         if (changed == ITEM_CHANGED_TEXTONLY)
252                                 return EXISTS_CHANGED_TEXTONLY;
253                         else
254                                 return EXISTS_CHANGED;
255                 }
256
257                 return EXISTS_UNCHANGED;
258         }
259
260         return EXISTS_NEW;
261 }
262
263 /* =============================================================== */
264
265 void rssyl_add_item(RFolderItem *ritem, FeedItem *feed_item)
266 {
267         FeedItem *old_item = NULL;
268         MsgFlags *flags;
269         MsgPermFlags oldperm_flags = 0;
270         MsgInfo *msginfo;
271         FILE *f;
272         gint fd, d, dif, errno = 0;
273         time_t tmpd;
274         gchar *meta_charset = NULL;
275         gchar *baseurl = NULL;
276         gchar *template = NULL;
277         gchar *tmp = NULL, *tmpurl = NULL, *tmpid = NULL;
278         gchar *dirname = NULL;
279         gchar *text = NULL;
280         gchar *heading = NULL;
281         gchar hdr[1024];
282         FeedItemEnclosure *enc = NULL;
283         RFeedCtx *ctx;
284
285         g_return_if_fail(ritem != NULL);
286
287         /* If item title is empty, try to fill it from source title (Atom only). */
288         tmp = feed_item_get_sourcetitle(feed_item);
289         if( feed_item_get_title(feed_item) == NULL ||
290                         strlen(feed_item->title) == 0 ) {
291                 if( tmp != NULL && strlen(tmp) > 0 )
292                         feed_item_set_title(feed_item, tmp);
293                 else
294                         feed_item_set_title(feed_item, C_("Empty RSS feed title placeholder", "(empty)"));
295         }
296
297 /*
298         if (feed_item_get_id(feed_item) == NULL) {
299                 debug_print("RSSyl: item ID empty, using its URL as ID.\n");
300                 feed_item_set_id(feed_item, feed_item_get_url(feed_item));
301         }
302 */
303
304         /* If neither item date is set, use date from source (Atom only). */
305         if( feed_item_get_date_modified(feed_item) == -1 &&
306                         feed_item_get_date_published(feed_item) == -1 )
307                 feed_item_set_date_published(feed_item,
308                                 feed_item_get_sourcedate(feed_item));
309
310         /* Fix up subject, url and ID (rssyl_format_string()) so that
311          * comparing doesn't break. */
312         debug_print("RSSyl: fixing up subject '%s'\n", feed_item_get_title(feed_item));
313         feed_item_set_title(feed_item, rssyl_format_string(feed_item_get_title(feed_item), TRUE, TRUE));
314         debug_print("RSSyl: fixing up URL\n");
315         feed_item_set_url(feed_item, rssyl_format_string(feed_item_get_url(feed_item),
316                                 TRUE, TRUE));
317         if( feed_item_get_id(feed_item) != NULL ) {
318                 debug_print("RSSyl: fixing up ID\n");
319                 feed_item_set_id(feed_item, rssyl_format_string(feed_item_get_id(feed_item),
320                                         TRUE, TRUE));
321         }
322
323         /* If there's a summary, but no text, use summary as text. */
324         if( feed_item_get_text(feed_item) == NULL &&
325                         (tmp = feed_item_get_summary(feed_item)) != NULL ) {
326                 feed_item_set_text(feed_item, tmp);
327                 g_free(feed_item->summary);     /* We do not need summary in rssyl now. */
328                 feed_item->summary = NULL;
329         }
330
331         /* Do not add if the item already exists, update if it does exist, but
332          * has changed. */
333         dif = rssyl_feed_item_exists(ritem, feed_item, &old_item);
334         debug_print("RSSyl: rssyl_feed_item_exists returned %d\n", dif);
335
336         if( dif == EXISTS_UNCHANGED ) {
337                 debug_print("RSSyl: This item already exists, skipping...\n");
338                 return;
339         }
340
341         /* Item is already in the list, but has changed */
342         if( dif >= EXISTS_CHANGED && old_item != NULL ) {
343                 debug_print("RSSyl: Item changed, removing old one and adding new.\n");
344
345                 /* Store permflags of the old item. */
346                 ctx = (RFeedCtx *)old_item->data;
347                 msginfo = folder_item_get_msginfo((FolderItem *)ritem,
348                                 atoi(g_path_get_basename(ctx->path)));
349                 oldperm_flags = msginfo->flags.perm_flags;
350
351                 ritem->items = g_slist_remove(ritem->items, old_item);
352                 if (g_unlink(ctx->path) != 0) {
353                         debug_print("RSSyl: Error, could not delete file '%s': %s\n",
354                                         ctx->path, g_strerror(errno));
355                 }
356
357                 g_free(ctx->path);
358                 feed_item_free(old_item);
359                 old_item = NULL;
360         }
361
362         /* Check against list of deleted items. */
363         if (rssyl_deleted_check(ritem->deleted_items, feed_item)) {
364                 debug_print("RSSyl: Item '%s' found among deleted items, NOT adding it.\n",
365                                 feed_item_get_title(feed_item));
366                 return;
367         }
368
369         /* Add a new item, formatting its title along the way */
370         debug_print("RSSyl: Adding item '%s'\n", feed_item_get_title(feed_item));
371         ritem->items = g_slist_prepend(ritem->items, feed_item_copy(feed_item));
372
373         dirname = folder_item_get_path(&ritem->item);
374         template = g_strconcat(dirname, G_DIR_SEPARATOR_S,
375                         RSSYL_TMP_TEMPLATE, NULL);
376         fd = mkstemp(template);
377
378         f = fdopen(fd, "w");
379         if(f == NULL) {
380                 g_warning("Couldn't open file '%s', not adding msg!\n", template);
381                 g_free(template);
382                 return;
383         }
384
385         /* From */
386         if( (tmp = feed_item_get_author(feed_item)) != NULL ) {
387                 if( g_utf8_validate(tmp, -1, NULL)) {
388                         conv_encode_header_full(hdr, 1023, tmp, strlen("From: "),
389                                         TRUE, CS_UTF_8);
390                         fprintf(f, "From: %s\n", hdr);
391                 } else
392                         fprintf(f, "From: %s\n", tmp);
393         }
394
395         /* Date */
396         if( (tmpd = feed_item_get_date_modified(feed_item)) != -1 ) {
397                 tmp = createRFC822Date(&tmpd);
398                 debug_print("RSSyl: using date_modified: '%s'\n", tmp);
399         } else if( (tmpd = feed_item_get_date_published(feed_item)) != -1 ) {
400                 tmp = createRFC822Date(&tmpd);
401                 debug_print("RSSyl: using date_published: '%s'\n", tmp);
402         } else {
403                 tmpd = time(NULL);
404                 tmp = createRFC822Date(&tmpd);
405         }
406
407         if( tmp != NULL ) {
408                 fprintf(f, "Date: %s\n", tmp);
409                 g_free(tmp);
410         }
411
412         if( (tmp = feed_item_get_title(feed_item)) != NULL ) {
413
414                 /* (Atom only) Strip HTML markup from title for the Subject line. */
415                 if( feed_item_get_title_format(feed_item) == FEED_ITEM_TITLE_HTML
416                                 || feed_item_get_title_format(feed_item) == FEED_ITEM_TITLE_XHTML) {
417                         debug_print("RSSyl: item title is HTML/XHTML, stripping tags for Subject line\n");
418                         tmp = g_strdup(tmp);
419                         strip_html(tmp);
420                 }
421
422                 if( g_utf8_validate(tmp, -1, NULL) ) {
423                         conv_encode_header_full(hdr, 1023, tmp, strlen("Subject: "),
424                                         FALSE, CS_UTF_8);
425                         debug_print("RSSyl: Subject: %s\n", hdr);
426                         fprintf(f, "Subject: %s\n", hdr);
427                 } else
428                         fprintf(f, "Subject: %s\n", tmp);
429
430                 if( feed_item_get_title_format(feed_item) == FEED_ITEM_TITLE_HTML
431                                 || feed_item_get_title_format(feed_item) == FEED_ITEM_TITLE_XHTML) {
432                         g_free(tmp);
433                         fprintf(f, "X-RSSyl-OrigTitle: %s\n", feed_item_get_title(feed_item));
434                 }
435         } else {
436                 debug_print("RSSyl: No feed title, it seems\n");
437                 fprintf(f, "Subject: (empty)\n");
438         }
439
440         /* X-RSSyl-URL */
441         if( (tmpurl = feed_item_get_url(feed_item)) == NULL ) {
442                 if( feed_item_get_id(feed_item) != NULL &&
443                                 feed_item_id_is_permalink(feed_item) ) {
444                         tmpurl = feed_item_get_id(feed_item);
445                 }
446         }
447
448         if( tmpurl != NULL )
449                 fprintf(f, "X-RSSyl-URL: %s\n", tmpurl);
450
451         if( ritem->last_update > 0) {
452                 fprintf(f, "X-RSSyl-Last-Seen: %ld\n", ritem->last_update);
453         }
454
455         /* Message-ID */
456         if( (tmpid = feed_item_get_id(feed_item)) == NULL )
457                 tmpid = feed_item_get_url(feed_item);
458         if( tmpid != NULL )
459                 fprintf(f, "Message-ID: <%s>\n", tmpid);
460
461         /* X-RSSyl-Comments */
462         if( (text = feed_item_get_comments_url(feed_item)) != NULL )
463                 fprintf(f, "X-RSSyl-Comments: %s\n", text);
464
465         /* References */
466         if( (text = feed_item_get_parent_id(feed_item)) != NULL )
467                 fprintf(f, "References: <%s>\n", text);
468
469         /* Content-Type */
470         text = feed_item_get_text(feed_item);
471         if( text && g_utf8_validate(text, -1, NULL) ) {
472                 fprintf(f, "Content-Type: text/html; charset=UTF-8\n\n");
473                 meta_charset = g_strdup("<meta http-equiv=\"Content-Type\" "
474                                 "content=\"text/html; charset=UTF-8\">");
475         } else {
476                 fprintf(f, "Content-Type: text/html\n\n");
477         }
478
479         /* construct base href */
480         if( feed_item_get_url(feed_item) != NULL )
481                 baseurl = g_strdup_printf("<base href=\"%s\">\n",
482                         feed_item_get_url(feed_item) );
483
484         if( ritem->write_heading )
485                 heading = g_strdup_printf("<h2>%s</h2>\n<br><br>\n",
486                                 feed_item_get_title(feed_item));
487
488         /* Message body */
489         fprintf(f, "<html><head>"
490                         "%s\n"
491                         "%s"
492                         "</head>\n<body>\n"
493                         "%s\n"
494                         "URL: <a href=\"%s\">%s</a>\n\n<br><br>\n"
495                         RSSYL_TEXT_START"\n"
496                         "%s%s"
497                         RSSYL_TEXT_END"\n\n",
498                         (meta_charset ? meta_charset : ""),
499                         (baseurl ? baseurl : ""),
500                         (heading ? heading : ""),
501                         (tmpurl ? tmpurl : ""),
502                         (tmpurl ? tmpurl : "n/a"),
503                         (text ? text : ""), (text ? "\n" : "") );
504
505         g_free(meta_charset);
506         g_free(baseurl);
507         g_free(heading);
508
509         if( (enc = feed_item_get_enclosure(feed_item)) != NULL )
510                 fprintf(f, "<p><a href=\"%s\">Attached media file</a> [%s] (%ld bytes)</p>\n",
511                                 feed_item_enclosure_get_url(enc),
512                                 feed_item_enclosure_get_type(enc),
513                                 feed_item_enclosure_get_size(enc) );
514
515         fprintf(f, "</body></html>\n");
516         fclose(f);
517
518         g_return_if_fail(template != NULL);
519
520         flags = g_new(MsgFlags, 1);
521         flags->perm_flags = MSG_NEW | MSG_UNREAD;
522         flags->tmp_flags = 0;
523
524         d = folder_item_add_msg(&ritem->item, template, flags, TRUE);
525         g_free(template);
526
527         ctx = g_new0(RFeedCtx, 1);
528         ctx->path = (gpointer)g_strdup_printf("%s%c%d", dirname,
529                         G_DIR_SEPARATOR, d);
530         ctx->last_seen = ritem->last_update;
531         ((FeedItem *)ritem->items->data)->data = (gpointer)ctx;
532
533         /* Unset unread+new if the changed item wasn't set unread and user
534          * doesn't want to see it unread because of the change. */
535         if (!(oldperm_flags & MSG_UNREAD) && (ritem->silent_update == 2
536                         || (ritem->silent_update == 1 && dif == EXISTS_CHANGED_TEXTONLY)))
537                 procmsg_msginfo_unset_flags(
538                                 folder_item_get_msginfo((FolderItem *)ritem, d), MSG_NEW | MSG_UNREAD, 0);
539
540         debug_print("RSSyl: folder_item_add_msg(): %d\n", d);
541 }