2006-11-09 [colin] 2.6.0cvs23
[claws.git] / src / plugins / bogofilter / bogofilter.c
1 /*
2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3  * Copyright (C) 1999-2006 Hiroyuki Yamamoto and the Claws Mail Team
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  */
19
20 #ifdef HAVE_CONFIG_H
21 #  include "config.h"
22 #endif
23
24 #include "defs.h"
25
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 #include <errno.h>
29
30 #include <glib.h>
31 #include <glib/gi18n.h>
32
33 #if HAVE_LOCALE_H
34 #  include <locale.h>
35 #endif
36
37 #include "common/sylpheed.h"
38 #include "common/version.h"
39 #include "plugin.h"
40 #include "common/utils.h"
41 #include "hooks.h"
42 #include "procmsg.h"
43 #include "folder.h"
44 #include "prefs.h"
45 #include "prefs_gtk.h"
46
47 #include "bogofilter.h"
48 #include "inc.h"
49 #include "log.h"
50 #include "prefs_common.h"
51 #include "alertpanel.h"
52
53 #ifdef HAVE_SYSEXITS_H
54 #include <sysexits.h>
55 #endif
56 #ifdef HAVE_ERRNO_H
57 #include <errno.h>
58 #endif
59 #ifdef HAVE_SYS_ERRNO_H
60 #include <sys/errno.h>
61 #endif
62 #ifdef HAVE_TIME_H
63 #include <time.h>
64 #endif
65 #ifdef HAVE_SYS_TIME_H
66 #include <sys/time.h>
67 #endif
68 #ifdef HAVE_SIGNAL_H
69 #include <signal.h>
70 #endif
71 #ifdef HAVE_PWD_H
72 #include <pwd.h>
73 #endif
74
75 #define MAILS_PER_BATCH 20
76
77 static guint hook_id = -1;
78 static MessageCallback message_callback;
79
80 static BogofilterConfig config;
81
82 static PrefParam param[] = {
83         {"process_emails", "TRUE", &config.process_emails, P_BOOL,
84          NULL, NULL, NULL},
85         {"receive_spam", "TRUE", &config.receive_spam, P_BOOL,
86          NULL, NULL, NULL},
87         {"save_folder", NULL, &config.save_folder, P_STRING,
88          NULL, NULL, NULL},
89         {"max_size", "250", &config.max_size, P_INT,
90          NULL, NULL, NULL},
91         {"bogopath", "bogofilter", &config.bogopath, P_STRING,
92          NULL, NULL, NULL},
93
94         {NULL, NULL, NULL, P_OTHER, NULL, NULL, NULL}
95 };
96
97 /*
98  * Helper function for spawn_with_input() - write an entire
99  * string to a fd.
100  */
101 static gboolean
102 write_all (int         fd,
103            const char *buf,
104            gsize       to_write)
105 {
106   while (to_write > 0)
107     {
108       gssize count = write (fd, buf, to_write);
109       if (count < 0)
110         {
111           if (errno != EINTR)
112             return FALSE;
113         }
114       else
115         {
116           to_write -= count;
117           buf += count;
118         }
119     }
120
121   return TRUE;
122 }
123
124 static gboolean mail_filtering_hook(gpointer source, gpointer data)
125 {
126         MailFilteringData *mail_filtering_data = (MailFilteringData *) source;
127         MsgInfo *msginfo = mail_filtering_data->msginfo;
128         GSList *msglist = mail_filtering_data->msglist;
129         GSList *cur = NULL;
130         static gboolean warned_error = FALSE;
131         gchar *file = NULL;
132         int status = 0;
133         gchar *bogo_exec = (config.bogopath && *config.bogopath) ? config.bogopath:"bogofilter";
134         int total = 0, curnum = 0;
135         GSList *spams = NULL, *new_hams = NULL, *new_spams = NULL;
136         gchar buf[BUFSIZ];
137
138         gchar *bogo_args[4];
139         GPid bogo_pid;
140         gint bogo_stdin, bogo_stdout;
141         GError *error = NULL;
142         gboolean bogo_forked;
143
144         if (!config.process_emails) {
145                 return FALSE;
146         }
147         
148         if (msglist == NULL && msginfo != NULL) {
149                 g_warning("wrong call to bogofilter mail_filtering_hook");
150                 return FALSE;
151         }
152         
153         total = g_slist_length(msglist);
154         if (message_callback != NULL)
155                 message_callback(_("Bogofilter: filtering messages..."), total, 0);
156
157         /* determine spam status - should be threaded */
158         bogo_args[0] = bogo_exec;
159         bogo_args[1] = "-T";
160         bogo_args[2] = "-b";
161         bogo_args[3] = NULL;
162
163         bogo_forked = g_spawn_async_with_pipes(
164                         NULL, bogo_args,NULL, G_SPAWN_SEARCH_PATH|G_SPAWN_DO_NOT_REAP_CHILD,
165                         NULL, NULL, &bogo_pid, &bogo_stdin,
166                         &bogo_stdout, NULL, &error);
167                 
168         if (bogo_forked == FALSE) {
169                 g_warning("%s\n", error ? error->message:"ERROR???");
170                 g_error_free(error);
171                 error = NULL;
172                 status = -1;
173         } else {
174                 for (cur = msglist; cur; cur = cur->next) {
175                         msginfo = (MsgInfo *)cur->data;
176                         debug_print("Filtering message %d (%d/%d)\n", msginfo->msgnum, curnum, total);
177
178                         if (message_callback != NULL)
179                                 message_callback(NULL, total, curnum++);
180
181                         /* can set flags (SCANNED, ATTACHMENT) but that's ok 
182                          * as GUI updates are hooked not direct */
183                         file = procmsg_get_message_file(msginfo);
184
185                         if (file) {
186                                 gchar *tmp = g_strdup_printf("%s\n",file);
187                                 write_all(bogo_stdin, tmp, strlen(tmp));
188                                 g_free(tmp);
189                                 memset(buf, 0, sizeof(buf));
190                                 if (read(bogo_stdout, buf, sizeof(buf)-1) < 0) {
191                                         g_warning("bogofilter short read\n");
192                                         debug_print("message %d is ham\n", msginfo->msgnum);
193                                         mail_filtering_data->unfiltered = g_slist_prepend(
194                                                 mail_filtering_data->unfiltered, msginfo);
195                                         new_hams = g_slist_prepend(new_hams, msginfo);
196                                 } else {
197                                         gchar **parts = NULL;
198                                         if (strchr(buf, '/')) {
199                                                 tmp = strrchr(buf, '/')+1;
200                                         } else {
201                                                 tmp = buf;
202                                         }
203                                         parts = g_strsplit(tmp, " ", 0);
204                                         debug_print("read %s\n", buf);
205                                         if (parts && parts[0] && parts[1] && *parts[1] == 'S') {
206                                                 debug_print("message %d is spam\n", msginfo->msgnum);
207                                                 if (config.receive_spam) {
208                                                         spams = g_slist_prepend(spams, msginfo);
209                                                 } 
210
211                                                 mail_filtering_data->filtered = g_slist_prepend(
212                                                         mail_filtering_data->filtered, msginfo);
213                                                 new_spams = g_slist_prepend(new_spams, msginfo);
214                                         } else {
215                                                 debug_print("message %d is ham\n", msginfo->msgnum);
216                                                 mail_filtering_data->unfiltered = g_slist_prepend(
217                                                         mail_filtering_data->unfiltered, msginfo);
218                                                 new_hams = g_slist_prepend(new_hams, msginfo);
219                                         }
220                                         g_strfreev(parts);
221                                 }
222                                 g_free(file);
223                         } else {
224                                 mail_filtering_data->unfiltered = g_slist_prepend(
225                                         mail_filtering_data->unfiltered, msginfo);
226                                 new_hams = g_slist_prepend(new_hams, msginfo);
227                         }
228                 }
229         }
230         
231         if (status != -1) {
232                 close(bogo_stdout);
233                 close(bogo_stdin);
234                 waitpid(bogo_pid, &status, 0);
235                 if (!WIFEXITED(status))
236                         status = -1;
237                 else
238                         status = WEXITSTATUS(status);
239         } 
240         /* end of thread */
241
242         /* flag hams */
243         for (cur = new_hams; cur; cur = cur->next) {
244                 MsgInfo *msginfo = (MsgInfo *)cur->data;
245                 procmsg_msginfo_unset_flags(msginfo, MSG_SPAM, 0);
246         }
247         g_slist_free(new_hams);
248         /* flag spams */
249         for (cur = new_spams; cur; cur = cur->next) {
250                 MsgInfo *msginfo = (MsgInfo *)cur->data;
251                 if (config.receive_spam) {
252                         procmsg_msginfo_change_flags(msginfo, MSG_SPAM, 0, ~0, 0);
253                 } else {
254                         folder_item_remove_msg(msginfo->folder, msginfo->msgnum);
255                 }
256         }
257         g_slist_free(new_spams);
258         
259         if (status < 0 || status > 2) { /* I/O or other errors */
260                 gchar *msg = NULL;
261                 
262                 if (status == 3)
263                         msg =  g_strdup_printf(_("The Bogofilter plugin couldn't filter "
264                                            "a message. The probable cause of the "
265                                            "error is that it didn't learn from any mail.\n"
266                                            "Use \"/Mark/Mark as spam\" and \"/Mark/Mark as "
267                                            "ham\" to train Bogofilter with a few hundred "
268                                            "spam and ham messages."));
269                 else
270                         msg =  g_strdup_printf(_("The Bogofilter plugin couldn't filter "
271                                            "a message. the command `%s %s %s` couldn't be run."), 
272                                            bogo_args[0], bogo_args[1], bogo_args[2]);
273                 if (!prefs_common.no_recv_err_panel) {
274                         if (!warned_error) {
275                                 alertpanel_error(msg);
276                         }
277                         warned_error = TRUE;
278                 } else {
279                         gchar *tmp = g_strdup_printf("%s\n", msg);
280                         log_error(tmp);
281                         g_free(tmp);
282                 }
283                 g_free(msg);
284         }
285         if (status < 0 || status > 2) {
286                 g_slist_free(mail_filtering_data->filtered);
287                 g_slist_free(mail_filtering_data->unfiltered);
288                 g_slist_free(spams);
289                 mail_filtering_data->filtered = NULL;
290                 mail_filtering_data->unfiltered = NULL;
291         } else if (config.receive_spam && spams) {
292                 FolderItem *save_folder;
293
294                 if ((!config.save_folder) ||
295                     (config.save_folder[0] == '\0') ||
296                     ((save_folder = folder_find_item_from_identifier(config.save_folder)) == NULL))
297                         save_folder = folder_get_default_trash();
298                 if (save_folder) {
299                         for (cur = spams; cur; cur = cur->next) {
300                                 msginfo = (MsgInfo *)cur->data;
301                                 msginfo->is_move = TRUE;
302                                 msginfo->to_filter_folder = save_folder;
303                         }
304                 }
305         } 
306
307         if (message_callback != NULL)
308                 message_callback(NULL, 0, 0);
309         mail_filtering_data->filtered = g_slist_reverse(
310                 mail_filtering_data->filtered);
311         mail_filtering_data->unfiltered = g_slist_reverse(
312                 mail_filtering_data->unfiltered);
313         
314         return FALSE;
315 }
316
317 BogofilterConfig *bogofilter_get_config(void)
318 {
319         return &config;
320 }
321
322 int bogofilter_learn(MsgInfo *msginfo, GSList *msglist, gboolean spam)
323 {
324         gchar *cmd = NULL;
325         gchar *file = NULL;
326         const gchar *bogo_exec = (config.bogopath && *config.bogopath) ? config.bogopath:"bogofilter";
327         gint status = 0;
328         if (msginfo == NULL && msglist == NULL) {
329                 return -1;
330         }
331
332         if (msginfo) {
333                 file = procmsg_get_message_file(msginfo);
334                 if (file == NULL) {
335                         return -1;
336                 } else {
337                         if (message_callback != NULL)
338                                 message_callback(_("Bogofilter: learning from message..."), 0, 0);
339                         if (spam)
340                                 /* learn as spam */
341                                 cmd = g_strdup_printf("%s -s -I '%s'", bogo_exec, file);
342                         else if (MSG_IS_SPAM(msginfo->flags))
343                                 /* correct bogofilter, this wasn't spam */
344                                 cmd = g_strdup_printf("%s -Sn -I '%s'", bogo_exec, file);
345                         else 
346                                 /* learn as ham */
347                                 cmd = g_strdup_printf("%s -n -I '%s'", bogo_exec, file);
348                         if ((status = execute_command_line(cmd, FALSE)) != 0)
349                                 log_error(_("Learning failed; `%s` returned with status %d."),
350                                                 cmd, status);
351                         g_free(cmd);
352                         g_free(file);
353                         if (message_callback != NULL)
354                                 message_callback(NULL, 0, 0);
355                         return 0;
356                 }
357         }
358         if (msglist) {
359                 GSList *cur = msglist;
360                 MsgInfo *info;
361                 int total = g_slist_length(msglist);
362                 int done = 0;
363                 gboolean some_correction = FALSE, some_no_correction = FALSE;
364         
365                 if (message_callback != NULL)
366                         message_callback(_("Bogofilter: learning from messages..."), total, 0);
367                 
368                 for (cur = msglist; cur && status == 0; cur = cur->next) {
369                         info = (MsgInfo *)cur->data;
370                         if (spam)
371                                 some_no_correction = TRUE;
372                         else if (MSG_IS_SPAM(info->flags))
373                                 /* correct bogofilter, this wasn't spam */
374                                 some_correction = TRUE;
375                         else 
376                                 some_no_correction = TRUE;
377                         
378                 }
379                 
380                 if (some_correction && some_no_correction) {
381                         /* we potentially have to do different stuff for every mail */
382                         for (cur = msglist; cur && status == 0; cur = cur->next) {
383                                 info = (MsgInfo *)cur->data;
384                                 file = procmsg_get_message_file(info);
385
386                                 if (spam)
387                                         /* learn as spam */
388                                         cmd = g_strdup_printf("%s -s -I '%s'", bogo_exec, file);
389                                 else if (MSG_IS_SPAM(info->flags))
390                                         /* correct bogofilter, this wasn't spam */
391                                         cmd = g_strdup_printf("%s -Sn -I '%s'", bogo_exec, file);
392                                 else 
393                                         /* learn as ham */
394                                         cmd = g_strdup_printf("%s -n -I '%s'", bogo_exec, file);
395
396                                 if ((status = execute_command_line(cmd, FALSE)) != 0)
397                                         log_error(_("Learning failed; `%s` returned with status %d."),
398                                                         cmd, status);
399
400                                 g_free(cmd);
401                                 g_free(file);
402                                 done++;
403                                 if (message_callback != NULL)
404                                         message_callback(NULL, total, done);
405                         }
406                 } else if (some_correction || some_no_correction) {
407                         cur = msglist;
408                         
409                         gchar *bogo_args[4];
410                         GPid bogo_pid;
411                         gint bogo_stdin;
412                         GError *error = NULL;
413                         gboolean bogo_forked;
414
415                         bogo_args[0] = (gchar *)bogo_exec;
416                         if (some_correction && !some_no_correction)
417                                 bogo_args[1] = "-Sn";
418                         else if (some_no_correction && !some_correction)
419                                 bogo_args[1] = spam ? "-s":"-n";
420                         bogo_args[2] = "-b";
421                         bogo_args[3] = NULL;
422
423                         bogo_forked = g_spawn_async_with_pipes(
424                                         NULL, bogo_args,NULL, G_SPAWN_SEARCH_PATH|G_SPAWN_DO_NOT_REAP_CHILD,
425                                         NULL, NULL, &bogo_pid, &bogo_stdin,
426                                         NULL, NULL, &error);
427
428                         while (bogo_forked && cur) {
429                                 gchar *tmp = NULL;
430                                 info = (MsgInfo *)cur->data;
431                                 file = procmsg_get_message_file(info);
432                                 if (file) {
433                                         tmp = g_strdup_printf("%s\n", 
434                                                 file);
435                                         write_all(bogo_stdin, tmp, strlen(tmp));
436                                         g_free(tmp);
437                                 }
438                                 g_free(file);
439                                 done++;
440                                 if (message_callback != NULL)
441                                         message_callback(NULL, total, done);
442                                 cur = cur->next;
443                         }
444                         if (bogo_forked) {
445                                 close(bogo_stdin);
446                                 waitpid(bogo_pid, &status, 0);
447                                 if (!WIFEXITED(status))
448                                         status = -1;
449                                 else
450                                         status = WEXITSTATUS(status);
451                         }
452                         if (!bogo_forked || status != 0) {
453                                 log_error(_("Learning failed; `%s %s %s` returned with error:\n%s"),
454                                                 bogo_args[0], bogo_args[1], bogo_args[2], 
455                                                 error ? error->message:_("Unknown error"));
456                                 if (error)
457                                         g_error_free(error);
458                         }
459
460                 }
461
462                 if (message_callback != NULL)
463                         message_callback(NULL, 0, 0);
464                 return 0;
465         }
466         return -1;
467 }
468
469 void bogofilter_save_config(void)
470 {
471         PrefFile *pfile;
472         gchar *rcpath;
473
474         debug_print("Saving Bogofilter Page\n");
475
476         rcpath = g_strconcat(get_rc_dir(), G_DIR_SEPARATOR_S, COMMON_RC, NULL);
477         pfile = prefs_write_open(rcpath);
478         g_free(rcpath);
479         if (!pfile || (prefs_set_block_label(pfile, "Bogofilter") < 0))
480                 return;
481
482         if (prefs_write_param(param, pfile->fp) < 0) {
483                 g_warning("Failed to write Bogofilter configuration to file\n");
484                 prefs_file_close_revert(pfile);
485                 return;
486         }
487         fprintf(pfile->fp, "\n");
488
489         prefs_file_close(pfile);
490 }
491
492 void bogofilter_set_message_callback(MessageCallback callback)
493 {
494         message_callback = callback;
495 }
496
497 gint plugin_init(gchar **error)
498 {
499         gchar *rcpath;
500
501         hook_id = -1;
502
503         if ((sylpheed_get_version() > VERSION_NUMERIC)) {
504                 *error = g_strdup(_("Your version of Claws Mail is newer than the version the Bogofilter plugin was built with"));
505                 return -1;
506         }
507
508         if ((sylpheed_get_version() < MAKE_NUMERIC_VERSION(0, 9, 3, 86))) {
509                 *error = g_strdup(_("Your version of Claws Mail is too old for the Bogofilter plugin"));
510                 return -1;
511         }
512
513         prefs_set_default(param);
514         rcpath = g_strconcat(get_rc_dir(), G_DIR_SEPARATOR_S, COMMON_RC, NULL);
515         prefs_read_config(param, "Bogofilter", rcpath, NULL);
516         g_free(rcpath);
517
518         bogofilter_gtk_init();
519                 
520         debug_print("Bogofilter plugin loaded\n");
521
522         if (config.process_emails) {
523                 bogofilter_register_hook();
524         }
525
526         procmsg_register_spam_learner(bogofilter_learn);
527         procmsg_spam_set_folder(config.save_folder);
528
529         return 0;
530         
531 }
532
533 void plugin_done(void)
534 {
535         if (hook_id != -1) {
536                 bogofilter_unregister_hook();
537         }
538         g_free(config.save_folder);
539         bogofilter_gtk_done();
540         procmsg_unregister_spam_learner(bogofilter_learn);
541         procmsg_spam_set_folder(NULL);
542         debug_print("Bogofilter plugin unloaded\n");
543 }
544
545 const gchar *plugin_name(void)
546 {
547         return _("Bogofilter");
548 }
549
550 const gchar *plugin_desc(void)
551 {
552         return _("This plugin can check all messages that are received from an "
553                  "IMAP, LOCAL or POP account for spam using Bogofilter. "
554                  "You will need Bogofilter installed locally.\n "
555                  "\n"
556                  "Before Bogofilter can recognize spam messages, you have to "
557                  "train it by marking a few hundred spam and ham messages. "
558                  "Use \"/Mark/Mark as Spam\" and \"/Mark/Mark as ham\" to "
559                  "train Bogofilter.\n"
560                  "\n"
561                  "When a message is identified as spam it can be deleted or "
562                  "saved in a specially designated folder.\n"
563                  "\n"
564                  "Options can be found in /Configuration/Preferences/Plugins/Bogofilter");
565 }
566
567 const gchar *plugin_type(void)
568 {
569         return "GTK2";
570 }
571
572 const gchar *plugin_licence(void)
573 {
574         return "GPL";
575 }
576
577 const gchar *plugin_version(void)
578 {
579         return VERSION;
580 }
581
582 struct PluginFeature *plugin_provides(void)
583 {
584         static struct PluginFeature features[] = 
585                 { {PLUGIN_FILTERING, N_("Spam detection")},
586                   {PLUGIN_FILTERING, N_("Spam learning")},
587                   {PLUGIN_NOTHING, NULL}};
588         return features;
589 }
590
591 void bogofilter_register_hook(void)
592 {
593         if (hook_id == -1)
594                 hook_id = hooks_register_hook(MAIL_LISTFILTERING_HOOKLIST, mail_filtering_hook, NULL);
595         if (hook_id == -1) {
596                 g_warning("Failed to register mail filtering hook");
597                 config.process_emails = FALSE;
598         }
599 }
600
601 void bogofilter_unregister_hook(void)
602 {
603         if (hook_id != -1) {
604                 hooks_unregister_hook(MAIL_LISTFILTERING_HOOKLIST, hook_id);
605         }
606         hook_id = -1;
607 }