Rename claws_io to file-utils, and move file-related functions
[claws.git] / src / plugins / rssyl / rssyl_add_item.c
1 /*
2  * Claws Mail -- a GTK+ based, lightweight, and fast e-mail client
3  * Copyright (C) 1999-2004 Hiroyuki Yamamoto
4  * This file (C) 2005 Andrej Kacian <andrej@kacian.sk>
5  *
6  * - DESCRIPTION HERE
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21  */
22
23 #ifdef HAVE_CONFIG_H
24 #  include "config.h"
25 #endif
26
27 /* Global includes */
28 #include <errno.h>
29 #include <glib.h>
30 #include <glib/gi18n.h>
31 #include <string.h>
32
33 /* Claws Mail includes */
34 #include <codeconv.h>
35 #include <procmsg.h>
36 #include <common/utils.h>
37 #include <file-utils.h>
38
39 /* Local includes */
40 #include "libfeed/date.h"
41 #include "libfeed/feeditem.h"
42 #include "parse822.h"
43 #include "rssyl.h"
44 #include "rssyl_deleted.h"
45 #include "rssyl_feed.h"
46 #include "rssyl_parse_feed.h"
47 #include "strutils.h"
48
49 /* rssyl_cb_feed_compare()
50  *
51  * GCompareFunc function called by glib2's g_slist_find_custom().
52  */
53
54 static gint rssyl_cb_feed_compare(const FeedItem *a, const FeedItem *b)
55 {
56         gboolean date_eq = FALSE, url_eq = FALSE, title_eq = FALSE;
57         gboolean pubdate_eq = FALSE, moddate_eq = FALSE;
58         gboolean no_url = FALSE, no_date = FALSE, no_title = FALSE;
59         gboolean no_pubdate = FALSE, no_moddate = FALSE;
60         gchar *atit = NULL, *btit = NULL;
61
62         g_return_val_if_fail(a != NULL && b != NULL, 1);
63
64         /* ID should be unique. If it matches, we've found what we came for. */
65         if( (a->id != NULL) && (b->id != NULL) ) {
66                         if( !strcmp(a->id, b->id) ) {
67                                 return 0;
68                         }
69
70                         /* If both IDs are present, but they do not match, these are not the
71                          * droids we're looking for. */
72                         return 1;
73         }
74
75         /* Ok, we have no ID to aid us. Let's have a look at item timestamps
76          * and item title & url. */
77         if( (a->url != NULL) && (b->url != NULL) ) {
78                 if( !strcmp(a->url, b->url) )
79                         url_eq = TRUE;
80         } else
81                 no_url = TRUE;
82
83         /* Now we prepare some boolean flags to help us express comparing choices
84          * later on. */
85
86         /* Title */
87         if( (a->title != NULL) && (b->title != NULL) ) {
88                 atit = conv_unmime_header(a->title, CS_UTF_8, FALSE);
89                 btit = conv_unmime_header(b->title, CS_UTF_8, FALSE);
90                 if( !strcmp(atit, btit) )
91                         title_eq = TRUE;
92                 g_free(atit);
93                 g_free(btit);
94         } else
95                 no_title = TRUE;
96
97         /* Published date */
98         if (b->date_published <= 0) {
99                 no_pubdate = TRUE;
100         } else {
101                 if (a->date_published == b->date_published)
102                         pubdate_eq = TRUE;
103         }
104
105         /* Modified date */
106         if (b->date_modified <= 0) {
107                 no_moddate = TRUE;
108         } else {
109                 if (a->date_modified == b->date_modified)
110                         moddate_eq = TRUE;
111         }
112
113         if (no_pubdate && no_moddate)
114                 no_date = TRUE;
115
116         if (pubdate_eq || (no_pubdate && moddate_eq))
117                 date_eq = TRUE;
118
119         /* If timestamp and url match, it is reasonable to assume
120          * we found our item. */
121         if (url_eq && date_eq)
122                 return 0;
123
124         /* Likewise if timestamp and title match. */
125         if (title_eq && date_eq)
126                 return 0;
127
128         /* There is no timestamp and the url matches (or there is none),
129          * we need to compare titles, ... */
130         if( (no_url || url_eq) && no_date ) {
131                 if( title_eq )
132                         return 0;
133                 else
134                         return 1;
135         }
136
137         /* ... and as a last resort, if there is no title, item texts. */
138         if( no_title && a->text && b->text ) {
139                 if( !strcmp(a->text, b->text) )
140                         return 0;
141                 else
142                         return 1;
143         }
144
145         /* We don't know this item. */
146         return 1;
147 }
148
149 enum {
150         ITEM_UNCHANGED,
151         ITEM_CHANGED_TEXTONLY,
152         ITEM_CHANGED
153 };
154
155 static gint rssyl_feed_item_changed(FeedItem *new_item, FeedItem *old_item )
156 {
157         debug_print("RSSyl: comparing '%s' and '%s'\n",
158                         new_item->title, old_item->title);
159
160         /* if both have title ... */
161         if( old_item->title && new_item->title ) {
162                 gchar *old = conv_unmime_header(old_item->title, CS_UTF_8, FALSE);
163                 gchar *new = conv_unmime_header(new_item->title, CS_UTF_8, FALSE);
164                 if( strcmp(old, new) != 0 ) { /* ... compare "unmimed" titles */
165                         debug_print("RSSyl:\t\titem titles differ:\nOLD: '%s'\nNEW: '%s'\n",
166                                         old, new);
167                         g_free(old);
168                         g_free(new);
169                         return ITEM_CHANGED;
170                 }
171                 g_free(old);
172                 g_free(new);
173         } else {
174                 /* if atleast one has a title, they differ */
175                 if( old_item->title || new_item->title ) {
176                         debug_print("RSSyl:\t\t+/- title\n");
177                         return ITEM_CHANGED;
178                 }
179         }
180
181         if( old_item->author && new_item->author ) {
182                 gchar *old = conv_unmime_header(old_item->author, CS_UTF_8, TRUE);
183                 gchar *new = conv_unmime_header(new_item->author, CS_UTF_8, TRUE);
184                 if( strcmp(old, new) ) {  /* ... compare "unmimed" authors */
185                         g_free(old);
186                         g_free(new);
187                         debug_print("RSSyl:\t\titem authors differ\n");
188                         return ITEM_CHANGED;
189                 }
190                 g_free(old);
191                 g_free(new);
192         } else {
193                 /* if atleast one has author, they differ */
194                 if( old_item->author || new_item->author ) {
195                         debug_print("RSSyl:\t\t+/- author\n");
196                         return ITEM_CHANGED;
197                 }
198         }
199
200         /* if both have text ... */
201         if( old_item->text && new_item->text ) {
202                 if( strcmp(old_item->text, new_item->text) ) { /* ... compare them */
203                         debug_print("RSSyl:\t\titem texts differ\n");
204                         debug_print("\nOLD: '%s'\n", old_item->text);
205                         debug_print("\nNEW: '%s'\n", new_item->text);
206
207                         return ITEM_CHANGED_TEXTONLY;
208                 }
209         } else {
210                 /* if at least one has some text, they differ */
211                 if( old_item->text || new_item->text ) {
212                         debug_print("RSSyl:\t\t+/- text\n");
213                         return ITEM_CHANGED_TEXTONLY;
214                 }
215         }
216
217         /* they don't seem to differ */
218         return ITEM_UNCHANGED;
219 }
220
221 enum {
222         EXISTS_NEW,
223         EXISTS_UNCHANGED,
224         EXISTS_CHANGED,
225         EXISTS_CHANGED_TEXTONLY
226 };
227
228 /* rssyl_feed_item_exists()
229  *
230  * Returns 1 if a feed item already exists locally, 2 if there's a changed
231  * item with link that already belongs to existing item, 3 if only item's
232  * text has changed, 0 if item is new.
233  */
234
235 static guint rssyl_feed_item_exists(RFolderItem *ritem, FeedItem *fitem,
236                 FeedItem **oldfitem)
237 {
238         GSList *item = NULL;
239         FeedItem *efitem = NULL;
240         gint changed;
241
242         g_return_val_if_fail(ritem != NULL, FALSE);
243         g_return_val_if_fail(fitem != NULL, FALSE);
244
245         if( ritem->items == NULL || g_slist_length(ritem->items) == 0 )
246                 return EXISTS_NEW;
247
248         if( (item = g_slist_find_custom(ritem->items,
249                                         (gconstpointer)fitem, (GCompareFunc)rssyl_cb_feed_compare)) ) {
250                 efitem = (FeedItem *)item->data;
251                 if( (changed = rssyl_feed_item_changed(fitem, efitem)) > ITEM_UNCHANGED ) {
252                         *oldfitem = efitem;
253                         if (changed == ITEM_CHANGED_TEXTONLY)
254                                 return EXISTS_CHANGED_TEXTONLY;
255                         else
256                                 return EXISTS_CHANGED;
257                 }
258
259                 return EXISTS_UNCHANGED;
260         }
261
262         return EXISTS_NEW;
263 }
264
265 /* =============================================================== */
266
267 void rssyl_add_item(RFolderItem *ritem, FeedItem *feed_item)
268 {
269         FeedItem *old_item = NULL;
270         MsgFlags *flags;
271         MsgPermFlags oldperm_flags = 0;
272         MsgInfo *msginfo;
273         FILE *f;
274         gint fd, d, dif;
275         time_t tmpd;
276         gchar *meta_charset = NULL;
277         gchar *baseurl = NULL;
278         gchar *template = NULL;
279         gchar *tmp = NULL, *tmpurl = NULL, *tmpid = NULL;
280         gchar *dirname = NULL;
281         gchar *text = NULL;
282         gchar *heading = NULL;
283         gchar *pathbasename = NULL;
284         gchar hdr[1024];
285         FeedItemEnclosure *enc = NULL;
286         RFeedCtx *ctx;
287
288         g_return_if_fail(ritem != NULL);
289
290         /* If item title is empty, try to fill it from source title (Atom only). */
291         tmp = feed_item_get_sourcetitle(feed_item);
292         if( feed_item_get_title(feed_item) == NULL ||
293                         strlen(feed_item->title) == 0 ) {
294                 if( tmp != NULL && strlen(tmp) > 0 )
295                         feed_item_set_title(feed_item, tmp);
296                 else
297                         feed_item_set_title(feed_item, C_("Empty RSS feed title placeholder", "(empty)"));
298         }
299
300 /*
301         if (feed_item_get_id(feed_item) == NULL) {
302                 debug_print("RSSyl: item ID empty, using its URL as ID.\n");
303                 feed_item_set_id(feed_item, feed_item_get_url(feed_item));
304         }
305 */
306
307         /* If one of the timestamps is empty, set it to value of the other one. */
308         if( feed_item_get_date_modified(feed_item) == -1 &&
309                         feed_item_get_date_published(feed_item) >= 0 ) {
310                 debug_print("RSSyl: setting missing moddate to pubdate %ld\n",
311                                 feed_item_get_date_published(feed_item));
312                 feed_item_set_date_modified(feed_item,
313                                 feed_item_get_date_published(feed_item));
314         } else if( feed_item_get_date_published(feed_item) == -1 &&
315                         feed_item_get_date_modified(feed_item) >= 0 ) {
316                 debug_print("RSSyl: setting missing pubdate to modddate %ld\n",
317                                 feed_item_get_date_modified(feed_item));
318                 feed_item_set_date_published(feed_item,
319                                 feed_item_get_date_modified(feed_item));
320         } else if( feed_item_get_date_modified(feed_item) == -1 &&
321                         feed_item_get_date_published(feed_item) == -1 &&
322                         feed_item_get_sourcedate(feed_item) >= 0 ) {
323                 /* If neither item date is set, use date from source (Atom only). */
324                 debug_print("RSSyl: setting missing pubdate and moddate to feed source date %ld\n",
325                                 feed_item_get_sourcedate(feed_item));
326                 feed_item_set_date_modified(feed_item,
327                                 feed_item_get_sourcedate(feed_item));
328                 feed_item_set_date_published(feed_item,
329                                 feed_item_get_sourcedate(feed_item));
330         }
331
332         /* Fix up subject, url and ID (rssyl_format_string()) so that
333          * comparing doesn't break. */
334         debug_print("RSSyl: fixing up subject '%s'\n", feed_item_get_title(feed_item));
335         tmp = rssyl_format_string(feed_item_get_title(feed_item), TRUE, TRUE);
336         feed_item_set_title(feed_item, tmp);
337         g_free(tmp);
338         debug_print("RSSyl: fixing up URL\n");
339         tmp = rssyl_format_string(feed_item_get_url(feed_item), FALSE, TRUE);
340         feed_item_set_url(feed_item, tmp);
341         g_free(tmp);
342         if( feed_item_get_id(feed_item) != NULL ) {
343                 debug_print("RSSyl: fixing up ID\n");
344                 tmp = rssyl_format_string(feed_item_get_id(feed_item), FALSE, TRUE);
345                 feed_item_set_id(feed_item, tmp);
346                 g_free(tmp);
347         }
348
349         /* If there's a summary, but no text, use summary as text. */
350         if( feed_item_get_text(feed_item) == NULL &&
351                         (tmp = feed_item_get_summary(feed_item)) != NULL ) {
352                 feed_item_set_text(feed_item, tmp);
353                 g_free(feed_item->summary);     /* We do not need summary in rssyl now. */
354                 feed_item->summary = NULL;
355         }
356
357         /* Do not add if the item already exists, update if it does exist, but
358          * has changed. */
359         dif = rssyl_feed_item_exists(ritem, feed_item, &old_item);
360         debug_print("RSSyl: rssyl_feed_item_exists returned %d\n", dif);
361
362         if( dif == EXISTS_UNCHANGED ) {
363                 debug_print("RSSyl: This item already exists, skipping...\n");
364                 return;
365         }
366
367         /* Item is already in the list, but has changed */
368         if( dif >= EXISTS_CHANGED && old_item != NULL ) {
369                 debug_print("RSSyl: Item changed, removing old one and adding new.\n");
370
371                 /* Store permflags of the old item. */
372                 ctx = (RFeedCtx *)old_item->data;
373                 pathbasename = g_path_get_basename(ctx->path);
374                 msginfo = folder_item_get_msginfo((FolderItem *)ritem,
375                                                 atoi(pathbasename));
376                 g_free(pathbasename);
377                 oldperm_flags = msginfo->flags.perm_flags;
378
379                 ritem->items = g_slist_remove(ritem->items, old_item);
380                 if (g_unlink(ctx->path) != 0) {
381                         debug_print("RSSyl: Error, could not delete file '%s': %s\n",
382                                         ctx->path, g_strerror(errno));
383                 }
384
385                 g_free(ctx->path);
386                 feed_item_free(old_item);
387                 old_item = NULL;
388         }
389
390         /* Check against list of deleted items. */
391         if (rssyl_deleted_check(ritem->deleted_items, feed_item)) {
392                 debug_print("RSSyl: Item '%s' found among deleted items, NOT adding it.\n",
393                                 feed_item_get_title(feed_item));
394                 return;
395         }
396
397         /* Add a new item, formatting its title along the way */
398         debug_print("RSSyl: Adding item '%s'\n", feed_item_get_title(feed_item));
399         ritem->items = g_slist_prepend(ritem->items, feed_item_copy(feed_item));
400
401         dirname = folder_item_get_path(&ritem->item);
402         template = g_strconcat(dirname, G_DIR_SEPARATOR_S,
403                         RSSYL_TMP_TEMPLATE, NULL);
404         if ((fd = g_mkstemp(template)) < 0) {
405                 g_warning("Couldn't g_mkstemp('%s'), not adding message!", template);
406                 g_free(dirname);
407                 g_free(template);
408                 return;
409         }
410
411         f = claws_fdopen(fd, "w");
412         if (f == NULL) {
413                 g_warning("Couldn't open file '%s', not adding message!", template);
414                 g_free(dirname);
415                 g_free(template);
416                 return;
417         }
418
419         /* From */
420         if( (tmp = feed_item_get_author(feed_item)) != NULL ) {
421                 if( g_utf8_validate(tmp, -1, NULL)) {
422                         conv_encode_header_full(hdr, 1023, tmp, strlen("From: "),
423                                         TRUE, CS_UTF_8);
424                         fprintf(f, "From: %s\n", hdr);
425                 } else
426                         fprintf(f, "From: %s\n", tmp);
427         }
428
429         /* Date */
430         if( (tmpd = feed_item_get_date_modified(feed_item)) != -1 ) {
431                 tmp = createRFC822Date(&tmpd);
432                 debug_print("RSSyl: using date_modified: '%s'\n", tmp);
433         } else if( (tmpd = feed_item_get_date_published(feed_item)) != -1 ) {
434                 tmp = createRFC822Date(&tmpd);
435                 debug_print("RSSyl: using date_published: '%s'\n", tmp);
436         } else {
437                 tmpd = time(NULL);
438                 tmp = createRFC822Date(&tmpd);
439         }
440
441         if( tmp != NULL ) {
442                 fprintf(f, "Date: %s\n", tmp);
443                 g_free(tmp);
444         }
445
446         if( (tmp = feed_item_get_title(feed_item)) != NULL ) {
447
448                 /* (Atom only) Strip HTML markup from title for the Subject line. */
449                 if( feed_item_get_title_format(feed_item) == FEED_ITEM_TITLE_HTML
450                                 || feed_item_get_title_format(feed_item) == FEED_ITEM_TITLE_XHTML) {
451                         debug_print("RSSyl: item title is HTML/XHTML, stripping tags for Subject line\n");
452                         tmp = g_strdup(tmp);
453                         strip_html(tmp);
454                 }
455
456                 if( g_utf8_validate(tmp, -1, NULL) ) {
457                         conv_encode_header_full(hdr, 1023, tmp, strlen("Subject: "),
458                                         FALSE, CS_UTF_8);
459                         debug_print("RSSyl: Subject: %s\n", hdr);
460                         fprintf(f, "Subject: %s\n", hdr);
461                 } else
462                         fprintf(f, "Subject: %s\n", tmp);
463
464                 if( feed_item_get_title_format(feed_item) == FEED_ITEM_TITLE_HTML
465                                 || feed_item_get_title_format(feed_item) == FEED_ITEM_TITLE_XHTML) {
466                         g_free(tmp);
467                         fprintf(f, "X-RSSyl-OrigTitle: %s\n", feed_item_get_title(feed_item));
468                 }
469         } else {
470                 debug_print("RSSyl: No feed title, it seems\n");
471                 fprintf(f, "Subject: (empty)\n");
472         }
473
474         /* X-RSSyl-URL */
475         if( (tmpurl = feed_item_get_url(feed_item)) == NULL ) {
476                 if( feed_item_get_id(feed_item) != NULL &&
477                                 feed_item_id_is_permalink(feed_item) ) {
478                         tmpurl = feed_item_get_id(feed_item);
479                 }
480         }
481
482         if( tmpurl != NULL )
483                 fprintf(f, "X-RSSyl-URL: %s\n", tmpurl);
484
485         if( ritem->last_update > 0) {
486                 fprintf(f, "X-RSSyl-Last-Seen: %lld\n", (long long)ritem->last_update);
487         }
488
489         /* Message-ID */
490         if( (tmpid = feed_item_get_id(feed_item)) == NULL )
491                 tmpid = feed_item_get_url(feed_item);
492         if( tmpid != NULL )
493                 fprintf(f, "Message-ID: <%s>\n", tmpid);
494
495         /* X-RSSyl-Comments */
496         if( (text = feed_item_get_comments_url(feed_item)) != NULL )
497                 fprintf(f, "X-RSSyl-Comments: %s\n", text);
498
499         /* References */
500         if( (text = feed_item_get_parent_id(feed_item)) != NULL )
501                 fprintf(f, "References: <%s>\n", text);
502
503         /* Content-Type */
504         text = feed_item_get_text(feed_item);
505         if( text && g_utf8_validate(text, -1, NULL) ) {
506                 fprintf(f, "Content-Type: text/html; charset=UTF-8\n\n");
507                 meta_charset = g_strdup("<meta http-equiv=\"Content-Type\" "
508                                 "content=\"text/html; charset=UTF-8\">");
509         } else {
510                 fprintf(f, "Content-Type: text/html\n\n");
511         }
512
513         /* construct base href */
514         if( feed_item_get_url(feed_item) != NULL )
515                 baseurl = g_strdup_printf("<base href=\"%s\">\n",
516                         feed_item_get_url(feed_item) );
517
518         if( ritem->write_heading )
519                 heading = g_strdup_printf("<h2>%s</h2>\n<br><br>\n",
520                                 feed_item_get_title(feed_item));
521
522         /* Message body */
523         fprintf(f, "<html><head>"
524                         "%s\n"
525                         "%s"
526                         "</head>\n<body>\n"
527                         "%s\n"
528                         "URL: <a href=\"%s\">%s</a>\n\n<br><br>\n"
529                         RSSYL_TEXT_START"\n"
530                         "%s%s"
531                         RSSYL_TEXT_END"\n\n",
532                         (meta_charset ? meta_charset : ""),
533                         (baseurl ? baseurl : ""),
534                         (heading ? heading : ""),
535                         (tmpurl ? tmpurl : ""),
536                         (tmpurl ? tmpurl : "n/a"),
537                         (text ? text : ""), (text ? "\n" : "") );
538
539         g_free(meta_charset);
540         g_free(baseurl);
541         g_free(heading);
542
543         if( (enc = feed_item_get_enclosure(feed_item)) != NULL )
544                 fprintf(f, "<p><a href=\"%s\">Attached media file</a> [%s] (%ld bytes)</p>\n",
545                                 feed_item_enclosure_get_url(enc),
546                                 feed_item_enclosure_get_type(enc),
547                                 feed_item_enclosure_get_size(enc) );
548
549         fprintf(f, "</body></html>\n");
550         claws_safe_fclose(f);
551
552         g_return_if_fail(template != NULL);
553
554         flags = g_new(MsgFlags, 1);
555         flags->perm_flags = MSG_NEW | MSG_UNREAD;
556         flags->tmp_flags = 0;
557
558         d = folder_item_add_msg(&ritem->item, template, flags, TRUE);
559         g_free(template);
560         g_free(flags);
561
562         ctx = g_new0(RFeedCtx, 1);
563         ctx->path = (gpointer)g_strdup_printf("%s%c%d", dirname,
564                         G_DIR_SEPARATOR, d);
565         ctx->last_seen = ritem->last_update;
566         ((FeedItem *)ritem->items->data)->data = (gpointer)ctx;
567
568         g_free(dirname);
569
570         /* Unset unread+new if the changed item wasn't set unread and user
571          * doesn't want to see it unread because of the change. */
572         if (dif != EXISTS_NEW) {
573                 if (!(oldperm_flags & MSG_UNREAD) && (ritem->silent_update == 2
574                                 || (ritem->silent_update == 1 && dif == EXISTS_CHANGED_TEXTONLY)))
575                         procmsg_msginfo_unset_flags(
576                                         folder_item_get_msginfo((FolderItem *)ritem, d), MSG_NEW | MSG_UNREAD, 0);
577         }
578
579         debug_print("RSSyl: folder_item_add_msg(): %d\n", d);
580 }