e38ef19c9c633b4011482ab3363dd62a98ff3ca7
[claws.git] / src / plugins / bogofilter / bogofilter.c
1 /*
2  * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3  * Copyright (C) 1999-2006 Hiroyuki Yamamoto and the Sylpheed-Claws Team
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  */
19
20 #ifdef HAVE_CONFIG_H
21 #  include "config.h"
22 #endif
23
24 #include "defs.h"
25
26 #include <sys/types.h>
27 #include <sys/wait.h>
28
29 #include <glib.h>
30 #include <glib/gi18n.h>
31
32 #if HAVE_LOCALE_H
33 #  include <locale.h>
34 #endif
35
36 #include "common/sylpheed.h"
37 #include "common/version.h"
38 #include "plugin.h"
39 #include "common/utils.h"
40 #include "hooks.h"
41 #include "procmsg.h"
42 #include "folder.h"
43 #include "prefs.h"
44 #include "prefs_gtk.h"
45
46 #include "bogofilter.h"
47 #include "inc.h"
48 #include "log.h"
49 #include "prefs_common.h"
50 #include "alertpanel.h"
51
52 #ifdef HAVE_SYSEXITS_H
53 #include <sysexits.h>
54 #endif
55 #ifdef HAVE_ERRNO_H
56 #include <errno.h>
57 #endif
58 #ifdef HAVE_SYS_ERRNO_H
59 #include <sys/errno.h>
60 #endif
61 #ifdef HAVE_TIME_H
62 #include <time.h>
63 #endif
64 #ifdef HAVE_SYS_TIME_H
65 #include <sys/time.h>
66 #endif
67 #ifdef HAVE_SIGNAL_H
68 #include <signal.h>
69 #endif
70 #ifdef HAVE_PWD_H
71 #include <pwd.h>
72 #endif
73
74 #define MAILS_PER_BATCH 20
75
76 static guint hook_id = -1;
77 static MessageCallback message_callback;
78
79 static BogofilterConfig config;
80
81 static PrefParam param[] = {
82         {"process_emails", "TRUE", &config.process_emails, P_BOOL,
83          NULL, NULL, NULL},
84         {"receive_spam", "TRUE", &config.receive_spam, P_BOOL,
85          NULL, NULL, NULL},
86         {"save_folder", NULL, &config.save_folder, P_STRING,
87          NULL, NULL, NULL},
88         {"max_size", "250", &config.max_size, P_INT,
89          NULL, NULL, NULL},
90         {"bogopath", "bogofilter", &config.bogopath, P_STRING,
91          NULL, NULL, NULL},
92
93         {NULL, NULL, NULL, P_OTHER, NULL, NULL, NULL}
94 };
95
96 /*
97  * Helper function for spawn_with_input() - write an entire
98  * string to a fd.
99  */
100 static gboolean
101 write_all (int         fd,
102            const char *buf,
103            gsize       to_write)
104 {
105   while (to_write > 0)
106     {
107       gssize count = write (fd, buf, to_write);
108       if (count < 0)
109         {
110           if (errno != EINTR)
111             return FALSE;
112         }
113       else
114         {
115           to_write -= count;
116           buf += count;
117         }
118     }
119
120   return TRUE;
121 }
122
123 static gboolean mail_filtering_hook(gpointer source, gpointer data)
124 {
125         MailFilteringData *mail_filtering_data = (MailFilteringData *) source;
126         MsgInfo *msginfo = mail_filtering_data->msginfo;
127         GSList *msglist = mail_filtering_data->msglist;
128         GSList *cur = NULL;
129         static gboolean warned_error = FALSE;
130         gchar *file = NULL, *cmd = NULL;
131         int status = 0;
132         gchar *bogo_exec = (config.bogopath && *config.bogopath) ? config.bogopath:"bogofilter";
133         int total = 0, curnum = 0;
134         GSList *spams = NULL;
135         gchar buf[BUFSIZ];
136
137         gchar *bogo_args[4];
138         GPid bogo_pid;
139         gint bogo_stdin, bogo_stdout;
140         GError *error = NULL;
141         gboolean bogo_forked;
142
143         if (!config.process_emails) {
144                 return FALSE;
145         }
146         
147         if (msglist == NULL && msginfo != NULL) {
148                 g_warning("wrong call to bogofilter mail_filtering_hook");
149                 return FALSE;
150         }
151         
152         total = g_slist_length(msglist);
153         if (message_callback != NULL)
154                 message_callback(_("Bogofilter: filtering messages..."), total, 0);
155
156         cmd = g_strdup_printf("%s -T -b", bogo_exec);
157
158         bogo_args[0] = bogo_exec;
159         bogo_args[1] = "-T";
160         bogo_args[2] = "-b";
161         bogo_args[3] = NULL;
162
163         bogo_forked = g_spawn_async_with_pipes(
164                         NULL, bogo_args,NULL, G_SPAWN_SEARCH_PATH|G_SPAWN_DO_NOT_REAP_CHILD,
165                         NULL, NULL, &bogo_pid, &bogo_stdin,
166                         &bogo_stdout, NULL, &error);
167                 
168         if (bogo_forked == FALSE) {
169                 g_warning("%s\n", error ? error->message:"ERROR???");
170                 g_error_free(error);
171                 error = NULL;
172                 status = -1;
173         } else {
174                 for (cur = msglist; cur; cur = cur->next) {
175                         msginfo = (MsgInfo *)cur->data;
176                         debug_print("Filtering message %d (%d/%d)\n", msginfo->msgnum, curnum, total);
177
178                         if (message_callback != NULL)
179                                 message_callback(NULL, total, curnum++);
180
181                         file = procmsg_get_message_file(msginfo);
182
183                         if (file) {
184                                 gchar *tmp = g_strdup_printf("%s\n",file);
185                                 write_all(bogo_stdin, tmp, strlen(tmp));
186                                 g_free(tmp);
187                                 memset(buf, 0, sizeof(buf));
188                                 if (read(bogo_stdout, buf, sizeof(buf)-1) < 0) {
189                                         printf("ERROR 2\n");
190                                 } else {
191                                         gchar **parts = NULL;
192                                         if (strchr(buf, '/')) {
193                                                 tmp = strrchr(buf, '/')+1;
194                                         } else {
195                                                 tmp = buf;
196                                         }
197                                         parts = g_strsplit(tmp, " ", 0);
198                                         debug_print("read %s\n", buf);
199                                         if (parts && parts[0] && parts[1] && *parts[1] == 'S') {
200                                                 debug_print("message %d is spam\n", msginfo->msgnum);
201                                                 procmsg_msginfo_set_flags(msginfo, MSG_SPAM, 0);
202                                                 if (config.receive_spam) {
203                                                         procmsg_msginfo_unset_flags(msginfo, ~0, 0);
204                                                         procmsg_msginfo_set_flags(msginfo, MSG_SPAM, 0);
205                                                         spams = g_slist_prepend(spams, msginfo);
206                                                 } else {
207                                                         folder_item_remove_msg(msginfo->folder, msginfo->msgnum);
208                                                 }
209                                                 mail_filtering_data->filtered = g_slist_prepend(
210                                                         mail_filtering_data->filtered, msginfo);
211                                         } else {
212                                                 debug_print("message %d is ham\n", msginfo->msgnum);
213                                                 procmsg_msginfo_unset_flags(msginfo, MSG_SPAM, 0);
214                                                 mail_filtering_data->unfiltered = g_slist_prepend(
215                                                         mail_filtering_data->unfiltered, msginfo);
216                                         }
217                                         g_strfreev(parts);
218                                 }
219                                 g_free(file);
220                         } else {
221                                 mail_filtering_data->unfiltered = g_slist_prepend(
222                                         mail_filtering_data->unfiltered, msginfo);
223                         }
224                 }
225         }
226         
227         if (status != -1) {
228                 close(bogo_stdout);
229                 close(bogo_stdin);
230                 waitpid(bogo_pid, &status, 0);
231                 if (!WIFEXITED(status))
232                         status = -1;
233                 else
234                         status = WEXITSTATUS(status);
235         } 
236
237         if (status < 0 || status > 2) { /* I/O or other errors */
238                 gchar *msg = NULL;
239                 
240                 if (status == 3)
241                         msg =  g_strdup_printf(_("The Bogofilter plugin couldn't filter "
242                                            "a message. The probable cause of the "
243                                            "error is that it didn't learn from any mail.\n"
244                                            "Use \"/Mark/Mark as spam\" and \"/Mark/Mark as "
245                                            "ham\" to train Bogofilter with a few hundred "
246                                            "spam and ham messages."));
247                 else
248                         msg =  g_strdup_printf(_("The Bogofilter plugin couldn't filter "
249                                            "a message. the command `%s` couldn't be run."), cmd);
250                 if (!prefs_common.no_recv_err_panel) {
251                         if (!warned_error) {
252                                 alertpanel_error(msg);
253                         }
254                         warned_error = TRUE;
255                 } else {
256                         gchar *tmp = g_strdup_printf("%s\n", msg);
257                         log_error(tmp);
258                         g_free(tmp);
259                 }
260                 g_free(msg);
261         }
262         if (status < 0 || status > 2) {
263                 g_slist_free(mail_filtering_data->filtered);
264                 g_slist_free(mail_filtering_data->unfiltered);
265                 g_slist_free(spams);
266                 mail_filtering_data->filtered = NULL;
267                 mail_filtering_data->unfiltered = NULL;
268         } else if (config.receive_spam && spams) {
269                 FolderItem *save_folder;
270
271                 if ((!config.save_folder) ||
272                     (config.save_folder[0] == '\0') ||
273                     ((save_folder = folder_find_item_from_identifier(config.save_folder)) == NULL))
274                         save_folder = folder_get_default_trash();
275
276                 folder_item_move_msgs(save_folder, spams);
277         } 
278
279         if (message_callback != NULL)
280                 message_callback(NULL, 0, 0);
281         mail_filtering_data->filtered = g_slist_reverse(
282                 mail_filtering_data->filtered);
283         mail_filtering_data->unfiltered = g_slist_reverse(
284                 mail_filtering_data->unfiltered);
285         
286         return FALSE;
287 }
288
289 BogofilterConfig *bogofilter_get_config(void)
290 {
291         return &config;
292 }
293
294 int bogofilter_learn(MsgInfo *msginfo, GSList *msglist, gboolean spam)
295 {
296         gchar *cmd = NULL;
297         gchar *file = NULL;
298         const gchar *bogo_exec = (config.bogopath && *config.bogopath) ? config.bogopath:"bogofilter";
299         gint status = 0;
300         if (msginfo == NULL && msglist == NULL) {
301                 return -1;
302         }
303
304         if (msginfo) {
305                 file = procmsg_get_message_file(msginfo);
306                 if (file == NULL) {
307                         return -1;
308                 } else {
309                         if (message_callback != NULL)
310                                 message_callback(_("Bogofilter: learning from message..."), 0, 0);
311                         if (spam)
312                                 /* learn as spam */
313                                 cmd = g_strdup_printf("%s -s -I '%s'", bogo_exec, file);
314                         else if (MSG_IS_SPAM(msginfo->flags))
315                                 /* correct bogofilter, this wasn't spam */
316                                 cmd = g_strdup_printf("%s -Sn -I '%s'", bogo_exec, file);
317                         else 
318                                 /* learn as ham */
319                                 cmd = g_strdup_printf("%s -n -I '%s'", bogo_exec, file);
320                         if ((status = execute_command_line(cmd, FALSE)) != 0)
321                                 alertpanel_error(_("Learning failed; `%s` returned with status %d."),
322                                                 cmd, status);
323                         g_free(cmd);
324                         g_free(file);
325                         if (message_callback != NULL)
326                                 message_callback(NULL, 0, 0);
327                         return 0;
328                 }
329         }
330         if (msglist) {
331                 GSList *cur = msglist;
332                 MsgInfo *info;
333                 int total = g_slist_length(msglist);
334                 int done = 0;
335                 gboolean some_correction = FALSE, some_no_correction = FALSE;
336         
337                 if (message_callback != NULL)
338                         message_callback(_("Bogofilter: learning from messages..."), total, 0);
339                 
340                 for (cur = msglist; cur && status == 0; cur = cur->next) {
341                         info = (MsgInfo *)cur->data;
342                         if (spam)
343                                 some_no_correction = TRUE;
344                         else if (MSG_IS_SPAM(info->flags))
345                                 /* correct bogofilter, this wasn't spam */
346                                 some_correction = TRUE;
347                         else 
348                                 some_no_correction = TRUE;
349                         
350                 }
351                 
352                 if (some_correction && some_no_correction) {
353                         /* we potentially have to do different stuff for every mail */
354                         for (cur = msglist; cur && status == 0; cur = cur->next) {
355                                 info = (MsgInfo *)cur->data;
356                                 file = procmsg_get_message_file(info);
357
358                                 if (spam)
359                                         /* learn as spam */
360                                         cmd = g_strdup_printf("%s -s -I '%s'", bogo_exec, file);
361                                 else if (MSG_IS_SPAM(info->flags))
362                                         /* correct bogofilter, this wasn't spam */
363                                         cmd = g_strdup_printf("%s -Sn -I '%s'", bogo_exec, file);
364                                 else 
365                                         /* learn as ham */
366                                         cmd = g_strdup_printf("%s -n -I '%s'", bogo_exec, file);
367
368                                 if ((status = execute_command_line(cmd, FALSE)) != 0)
369                                         alertpanel_error(_("Learning failed; `%s` returned with status %d."),
370                                                         cmd, status);
371
372                                 g_free(cmd);
373                                 g_free(file);
374                                 done++;
375                                 if (message_callback != NULL)
376                                         message_callback(NULL, total, done);
377                         }
378                 } else if (some_correction || some_no_correction) {
379                         cur = msglist;
380                         
381                         gchar *bogo_args[4];
382                         GPid bogo_pid;
383                         gint bogo_stdin;
384                         GError *error = NULL;
385                         gboolean bogo_forked;
386
387                         bogo_args[0] = (gchar *)bogo_exec;
388                         if (some_correction && !some_no_correction)
389                                 bogo_args[1] = "-Sn";
390                         else if (some_no_correction && !some_correction)
391                                 bogo_args[1] = spam ? "-s":"-n";
392                         bogo_args[2] = "-b";
393                         bogo_args[3] = NULL;
394
395                         bogo_forked = g_spawn_async_with_pipes(
396                                         NULL, bogo_args,NULL, G_SPAWN_SEARCH_PATH|G_SPAWN_DO_NOT_REAP_CHILD,
397                                         NULL, NULL, &bogo_pid, &bogo_stdin,
398                                         NULL, NULL, &error);
399
400                         while (bogo_forked && cur) {
401                                 gchar *tmp = NULL;
402                                 info = (MsgInfo *)cur->data;
403                                 file = procmsg_get_message_file(info);
404                                 if (file) {
405                                         tmp = g_strdup_printf("%s\n", 
406                                                 file);
407                                         write_all(bogo_stdin, tmp, strlen(tmp));
408                                         g_free(tmp);
409                                 }
410                                 g_free(file);
411                                 done++;
412                                 if (message_callback != NULL)
413                                         message_callback(NULL, total, done);
414                                 cur = cur->next;
415                         }
416                         if (bogo_forked) {
417                                 close(bogo_stdin);
418                                 waitpid(bogo_pid, &status, 0);
419                                 if (!WIFEXITED(status))
420                                         status = -1;
421                                 else
422                                         status = WEXITSTATUS(status);
423                         }
424                         if (!bogo_forked || status != 0) {
425                                 alertpanel_error(_("Learning failed; `%s` returned with error:\n%s"),
426                                                 cmd, error ? error->message:_("Unknown error"));
427                                 if (error)
428                                         g_error_free(error);
429                         }
430
431                 }
432
433                 if (message_callback != NULL)
434                         message_callback(NULL, 0, 0);
435                 return 0;
436         }
437         return -1;
438 }
439
440 void bogofilter_save_config(void)
441 {
442         PrefFile *pfile;
443         gchar *rcpath;
444
445         debug_print("Saving Bogofilter Page\n");
446
447         rcpath = g_strconcat(get_rc_dir(), G_DIR_SEPARATOR_S, COMMON_RC, NULL);
448         pfile = prefs_write_open(rcpath);
449         g_free(rcpath);
450         if (!pfile || (prefs_set_block_label(pfile, "Bogofilter") < 0))
451                 return;
452
453         if (prefs_write_param(param, pfile->fp) < 0) {
454                 g_warning("Failed to write Bogofilter configuration to file\n");
455                 prefs_file_close_revert(pfile);
456                 return;
457         }
458         fprintf(pfile->fp, "\n");
459
460         prefs_file_close(pfile);
461 }
462
463 void bogofilter_set_message_callback(MessageCallback callback)
464 {
465         message_callback = callback;
466 }
467
468 gint plugin_init(gchar **error)
469 {
470         gchar *rcpath;
471
472         hook_id = -1;
473
474         if ((sylpheed_get_version() > VERSION_NUMERIC)) {
475                 *error = g_strdup("Your version of Sylpheed-Claws is newer than the version the Bogofilter plugin was built with");
476                 return -1;
477         }
478
479         if ((sylpheed_get_version() < MAKE_NUMERIC_VERSION(0, 9, 3, 86))) {
480                 *error = g_strdup("Your version of Sylpheed-Claws is too old for the Bogofilter plugin");
481                 return -1;
482         }
483
484         prefs_set_default(param);
485         rcpath = g_strconcat(get_rc_dir(), G_DIR_SEPARATOR_S, COMMON_RC, NULL);
486         prefs_read_config(param, "Bogofilter", rcpath, NULL);
487         g_free(rcpath);
488
489         bogofilter_gtk_init();
490                 
491         debug_print("Bogofilter plugin loaded\n");
492
493         if (config.process_emails) {
494                 bogofilter_register_hook();
495         }
496
497         procmsg_register_spam_learner(bogofilter_learn);
498         procmsg_spam_set_folder(config.save_folder);
499
500         return 0;
501         
502 }
503
504 void plugin_done(void)
505 {
506         if (hook_id != -1) {
507                 bogofilter_unregister_hook();
508         }
509         g_free(config.save_folder);
510         bogofilter_gtk_done();
511         procmsg_unregister_spam_learner(bogofilter_learn);
512         procmsg_spam_set_folder(NULL);
513         debug_print("Bogofilter plugin unloaded\n");
514 }
515
516 const gchar *plugin_name(void)
517 {
518         return _("Bogofilter");
519 }
520
521 const gchar *plugin_desc(void)
522 {
523         return _("This plugin can check all messages that are received from an "
524                  "IMAP, LOCAL or POP account for spam using Bogofilter. "
525                  "You will need Bogofilter installed locally.\n "
526                  "\n"
527                  "Before Bogofilter can recognize spam messages, you have to "
528                  "train it by marking a few hundred spam and ham messages. "
529                  "Use \"/Mark/Mark as Spam\" and \"/Mark/Mark as ham\" to "
530                  "train Bogofilter.\n"
531                  "\n"
532                  "When a message is identified as spam it can be deleted or "
533                  "saved in a specially designated folder.\n"
534                  "\n"
535                  "Options can be found in /Configuration/Preferences/Plugins/Bogofilter");
536 }
537
538 const gchar *plugin_type(void)
539 {
540         return "GTK2";
541 }
542
543 const gchar *plugin_licence(void)
544 {
545         return "GPL";
546 }
547
548 const gchar *plugin_version(void)
549 {
550         return VERSION;
551 }
552
553 struct PluginFeature *plugin_provides(void)
554 {
555         static struct PluginFeature features[] = 
556                 { {PLUGIN_FILTERING, N_("Spam detection")},
557                   {PLUGIN_FILTERING, N_("Spam learning")},
558                   {PLUGIN_NOTHING, NULL}};
559         return features;
560 }
561
562 void bogofilter_register_hook(void)
563 {
564         hook_id = hooks_register_hook(MAIL_LISTFILTERING_HOOKLIST, mail_filtering_hook, NULL);
565         if (hook_id == -1) {
566                 g_warning("Failed to register mail filtering hook");
567                 config.process_emails = FALSE;
568         }
569 }
570
571 void bogofilter_unregister_hook(void)
572 {
573         if (hook_id != -1) {
574                 hooks_unregister_hook(MAIL_LISTFILTERING_HOOKLIST, hook_id);
575         }
576 }