2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2006 Hiroyuki Yamamoto and the Claws Mail Team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
26 #include <sys/types.h>
31 #include <glib/gi18n.h>
37 #include "common/claws.h"
38 #include "common/version.h"
40 #include "common/utils.h"
45 #include "prefs_gtk.h"
47 #include "bogofilter.h"
50 #include "prefs_common.h"
51 #include "alertpanel.h"
52 #include "addr_compl.h"
54 #ifdef HAVE_SYSEXITS_H
60 #ifdef HAVE_SYS_ERRNO_H
61 #include <sys/errno.h>
66 #ifdef HAVE_SYS_TIME_H
76 #define PLUGIN_NAME (_("Bogofilter"))
78 static guint hook_id = -1;
79 static MessageCallback message_callback;
81 static BogofilterConfig config;
83 static PrefParam param[] = {
84 {"process_emails", "TRUE", &config.process_emails, P_BOOL,
86 {"receive_spam", "TRUE", &config.receive_spam, P_BOOL,
88 {"save_folder", NULL, &config.save_folder, P_STRING,
90 {"max_size", "250", &config.max_size, P_INT,
92 {"bogopath", "bogofilter", &config.bogopath, P_STRING,
94 {"insert_header", "FALSE", &config.insert_header, P_BOOL,
96 {"whitelist_ab", "TRUE", &config.whitelist_ab, P_BOOL,
98 {"whitelist_ab_folder", "Any", &config.whitelist_ab_folder, P_STRING,
101 {NULL, NULL, NULL, P_OTHER, NULL, NULL, NULL}
105 * Helper function for spawn_with_input() - write an entire
115 gssize count = write (fd, buf, to_write);
131 typedef struct _BogoFilterData {
132 MailFilteringData *mail_filtering_data;
137 GSList *whitelisted_new_spams;
143 static BogoFilterData *to_filter_data = NULL;
145 static gboolean filter_th_done = FALSE;
146 static pthread_mutex_t list_mutex = PTHREAD_MUTEX_INITIALIZER;
147 static pthread_mutex_t wait_mutex = PTHREAD_MUTEX_INITIALIZER;
148 static pthread_cond_t wait_cond = PTHREAD_COND_INITIALIZER;
151 static gboolean found_in_addressbook(const gchar *address)
154 gboolean found = FALSE;
160 addr = g_strdup(address);
161 extract_address(addr);
162 num_addr = complete_address(addr);
164 /* skip first item (this is the search string itself) */
166 for (; i < num_addr && !found; i++) {
167 gchar *caddr = get_complete_address(i);
168 extract_address(caddr);
169 if (strcasecmp(caddr, addr) == 0)
178 static void bogofilter_do_filter(BogoFilterData *data)
181 gint bogo_stdin, bogo_stdout;
182 GError *error = NULL;
183 gboolean bogo_forked;
187 int total = 0, curnum = 0;
191 total = g_slist_length(data->msglist);
193 bogo_forked = g_spawn_async_with_pipes(
194 NULL, data->bogo_args,NULL, G_SPAWN_SEARCH_PATH|G_SPAWN_DO_NOT_REAP_CHILD,
195 NULL, NULL, &bogo_pid, &bogo_stdin,
196 &bogo_stdout, NULL, &error);
198 if (bogo_forked == FALSE) {
199 g_warning("%s\n", error ? error->message:"ERROR???");
205 if (config.whitelist_ab) {
206 gchar *ab_folderpath;
208 if (*config.whitelist_ab_folder == '\0' ||
209 strcasecmp(config.whitelist_ab_folder, _("Any")) == 0) {
210 /* match the whole addressbook */
211 ab_folderpath = NULL;
213 /* match the specific book/folder of the addressbook */
214 ab_folderpath = config.whitelist_ab_folder;
217 start_address_completion(ab_folderpath);
220 for (cur = data->msglist; cur; cur = cur->next) {
221 gboolean whitelisted = FALSE;
222 msginfo = (MsgInfo *)cur->data;
223 debug_print("Filtering message %d (%d/%d)\n", msginfo->msgnum, curnum, total);
225 if (message_callback != NULL)
226 message_callback(NULL, total, curnum++, data->in_thread);
228 if (config.whitelist_ab && msginfo->from &&
229 found_in_addressbook(msginfo->from))
232 /* can set flags (SCANNED, ATTACHMENT) but that's ok
233 * as GUI updates are hooked not direct */
235 file = procmsg_get_message_file(msginfo);
238 gchar *tmp = g_strdup_printf("%s\n",file);
239 /* send filename to bogofilter */
240 write_all(bogo_stdin, tmp, strlen(tmp));
242 memset(buf, 0, sizeof(buf));
244 if (read(bogo_stdout, buf, sizeof(buf)-1) < 0) {
245 g_warning("bogofilter short read\n");
246 debug_print("message %d is ham\n", msginfo->msgnum);
247 data->mail_filtering_data->unfiltered = g_slist_prepend(
248 data->mail_filtering_data->unfiltered, msginfo);
249 data->new_hams = g_slist_prepend(data->new_hams, msginfo);
251 gchar **parts = NULL;
252 if (strchr(buf, '/')) {
253 tmp = strrchr(buf, '/')+1;
257 parts = g_strsplit(tmp, " ", 0);
258 debug_print("read %s\n", buf);
260 /* note the result if the header if needed */
261 if (parts && parts[0] && parts[1] && parts[2] &&
262 FOLDER_TYPE(msginfo->folder->folder) == F_MH &&
263 config.insert_header) {
264 gchar *tmpfile = get_tmp_file();
265 FILE *input = fopen(file, "r");
266 FILE *output = fopen(tmpfile, "w");
267 if (strstr(parts[2], "\n"))
268 *(strstr(parts[2], "\n")) = '\0';
269 if (input && !output)
271 else if (!input && output)
274 gchar tmpbuf[BUFFSIZE];
275 const gchar *bogosity = *parts[1] == 'S' ? "Spam":
276 (*parts[1] == 'H' ? "Ham":"Unsure");
277 gchar *tmpstr = g_strdup_printf(
278 "X-Claws-Bogosity: %s, spamicity=%s%s\n",
280 whitelisted?" [whitelisted]":"");
281 fwrite(tmpstr, 1, strlen(tmpstr), output);
282 while (fgets(tmpbuf, sizeof(buf), input))
283 fputs(tmpbuf, output);
286 move_file(tmpfile, file, TRUE);
293 if (!whitelisted && parts && parts[0] && parts[1] && *parts[1] == 'S') {
295 debug_print("message %d is spam\n", msginfo->msgnum);
296 /* Spam will be filtered away */
297 data->mail_filtering_data->filtered = g_slist_prepend(
298 data->mail_filtering_data->filtered, msginfo);
299 data->new_spams = g_slist_prepend(data->new_spams, msginfo);
301 } else if (whitelisted && parts && parts[0] && parts[1] && *parts[1] == 'S') {
303 debug_print("message %d is whitelisted spam\n", msginfo->msgnum);
304 /* Whitelisted spam will *not* be filtered away, but continue
305 * their trip through filtering as if it was ham. */
306 data->mail_filtering_data->unfiltered = g_slist_prepend(
307 data->mail_filtering_data->unfiltered, msginfo);
308 /* But it gets put in a different list, so that we
309 * can still flag it and inform the user that it is
310 * considered a spam (so that he can teach bogo that
312 data->whitelisted_new_spams = g_slist_prepend(data->whitelisted_new_spams, msginfo);
316 debug_print("message %d is ham\n", msginfo->msgnum);
317 data->mail_filtering_data->unfiltered = g_slist_prepend(
318 data->mail_filtering_data->unfiltered, msginfo);
319 data->new_hams = g_slist_prepend(data->new_hams, msginfo);
326 data->mail_filtering_data->unfiltered = g_slist_prepend(
327 data->mail_filtering_data->unfiltered, msginfo);
328 data->new_hams = g_slist_prepend(data->new_hams, msginfo);
331 if (config.whitelist_ab)
332 end_address_completion();
337 waitpid(bogo_pid, &status, 0);
338 if (!WIFEXITED(status))
341 status = WEXITSTATUS(status);
344 to_filter_data->status = status;
348 static void *bogofilter_filtering_thread(void *data)
350 while (!filter_th_done) {
351 pthread_mutex_lock(&list_mutex);
352 if (to_filter_data == NULL || to_filter_data->done == TRUE) {
353 pthread_mutex_unlock(&list_mutex);
354 debug_print("thread is waiting for something to filter\n");
355 pthread_mutex_lock(&wait_mutex);
356 pthread_cond_wait(&wait_cond, &wait_mutex);
357 pthread_mutex_unlock(&wait_mutex);
359 debug_print("thread awaken with something to filter\n");
360 to_filter_data->done = FALSE;
361 bogofilter_do_filter(to_filter_data);
362 pthread_mutex_unlock(&list_mutex);
363 to_filter_data->done = TRUE;
370 static pthread_t filter_th = 0;
372 static void bogofilter_start_thread(void)
374 filter_th_done = FALSE;
375 if (filter_th != 0 || 1)
377 if (pthread_create(&filter_th, 0,
378 bogofilter_filtering_thread,
383 debug_print("thread created\n");
386 static void bogofilter_stop_thread(void)
389 while (pthread_mutex_trylock(&list_mutex) != 0) {
393 if (filter_th != 0) {
394 filter_th_done = TRUE;
395 debug_print("waking thread up\n");
396 pthread_mutex_lock(&wait_mutex);
397 pthread_cond_broadcast(&wait_cond);
398 pthread_mutex_unlock(&wait_mutex);
399 pthread_join(filter_th, &res);
402 pthread_mutex_unlock(&list_mutex);
403 debug_print("thread done\n");
407 static gboolean mail_filtering_hook(gpointer source, gpointer data)
409 MailFilteringData *mail_filtering_data = (MailFilteringData *) source;
410 MsgInfo *msginfo = mail_filtering_data->msginfo;
411 GSList *msglist = mail_filtering_data->msglist;
413 static gboolean warned_error = FALSE;
415 int total = 0, curnum = 0;
416 GSList *new_hams = NULL, *new_spams = NULL, *whitelisted_new_spams = NULL;
417 gchar *bogo_exec = (config.bogopath && *config.bogopath) ? config.bogopath:"bogofilter";
419 gboolean ok_to_thread = TRUE;
421 bogo_args[0] = bogo_exec;
426 if (!config.process_emails) {
430 if (msglist == NULL && msginfo != NULL) {
431 g_warning("wrong call to bogofilter mail_filtering_hook");
435 total = g_slist_length(msglist);
437 /* we have to make sure the mails are cached - or it'll break on IMAP */
438 if (message_callback != NULL)
439 message_callback(_("Bogofilter: fetching bodies..."), total, 0, FALSE);
440 for (cur = msglist; cur; cur = cur->next) {
441 gchar *file = procmsg_get_message_file((MsgInfo *)cur->data);
443 ok_to_thread = FALSE;
444 if (message_callback != NULL)
445 message_callback(NULL, total, curnum++, FALSE);
448 if (message_callback != NULL)
449 message_callback(NULL, 0, 0, FALSE);
451 if (message_callback != NULL)
452 message_callback(_("Bogofilter: filtering messages..."), total, 0, FALSE);
455 while (pthread_mutex_trylock(&list_mutex) != 0) {
460 to_filter_data = g_new0(BogoFilterData, 1);
461 to_filter_data->msglist = msglist;
462 to_filter_data->mail_filtering_data = mail_filtering_data;
463 to_filter_data->new_hams = NULL;
464 to_filter_data->new_spams = NULL;
465 to_filter_data->whitelisted_new_spams = NULL;
466 to_filter_data->done = FALSE;
467 to_filter_data->status = -1;
468 to_filter_data->bogo_args = bogo_args;
470 to_filter_data->in_thread = (filter_th != 0 && ok_to_thread);
472 to_filter_data->in_thread = FALSE;
476 pthread_mutex_unlock(&list_mutex);
478 if (filter_th != 0 && ok_to_thread) {
479 debug_print("waking thread to let it filter things\n");
480 pthread_mutex_lock(&wait_mutex);
481 pthread_cond_broadcast(&wait_cond);
482 pthread_mutex_unlock(&wait_mutex);
484 while (!to_filter_data->done) {
490 while (pthread_mutex_trylock(&list_mutex) != 0) {
495 if (filter_th == 0 || !ok_to_thread)
496 bogofilter_do_filter(to_filter_data);
498 bogofilter_do_filter(to_filter_data);
501 new_hams = to_filter_data->new_hams;
502 new_spams = to_filter_data->new_spams;
503 whitelisted_new_spams = to_filter_data->whitelisted_new_spams;
504 status = to_filter_data->status;
505 g_free(to_filter_data);
506 to_filter_data = NULL;
508 pthread_mutex_unlock(&list_mutex);
513 for (cur = new_hams; cur; cur = cur->next) {
514 MsgInfo *msginfo = (MsgInfo *)cur->data;
515 procmsg_msginfo_unset_flags(msginfo, MSG_SPAM, 0);
517 /* flag whitelisted spams */
518 for (cur = whitelisted_new_spams; cur; cur = cur->next) {
519 MsgInfo *msginfo = (MsgInfo *)cur->data;
520 procmsg_msginfo_set_flags(msginfo, MSG_SPAM, 0);
522 /* flag spams and delete them if !config.receive_spam
523 * (if config.receive_spam is set, we'll move them later) */
524 for (cur = new_spams; cur; cur = cur->next) {
525 MsgInfo *msginfo = (MsgInfo *)cur->data;
526 if (config.receive_spam) {
527 procmsg_msginfo_change_flags(msginfo, MSG_SPAM, 0, ~0, 0);
529 folder_item_remove_msg(msginfo->folder, msginfo->msgnum);
533 if (status < 0 || status > 2) { /* I/O or other errors */
537 msg = g_strdup_printf(_("The Bogofilter plugin couldn't filter "
538 "a message. The probable cause of the "
539 "error is that it didn't learn from any mail.\n"
540 "Use \"/Mark/Mark as spam\" and \"/Mark/Mark as "
541 "ham\" to train Bogofilter with a few hundred "
542 "spam and ham messages."));
544 msg = g_strdup_printf(_("The Bogofilter plugin couldn't filter "
545 "a message. the command `%s %s %s` couldn't be run."),
546 bogo_args[0], bogo_args[1], bogo_args[2]);
547 if (!prefs_common.no_recv_err_panel) {
549 alertpanel_error(msg);
553 gchar *tmp = g_strdup_printf("%s\n", msg);
559 if (status < 0 || status > 2) {
560 g_slist_free(mail_filtering_data->filtered);
561 g_slist_free(mail_filtering_data->unfiltered);
562 mail_filtering_data->filtered = NULL;
563 mail_filtering_data->unfiltered = NULL;
564 } else if (config.receive_spam && new_spams) {
565 FolderItem *save_folder;
567 if ((!config.save_folder) ||
568 (config.save_folder[0] == '\0') ||
569 ((save_folder = folder_find_item_from_identifier(config.save_folder)) == NULL))
570 save_folder = folder_get_default_trash();
572 for (cur = new_spams; cur; cur = cur->next) {
573 msginfo = (MsgInfo *)cur->data;
574 msginfo->is_move = TRUE;
575 msginfo->to_filter_folder = save_folder;
579 g_slist_free(new_hams);
580 g_slist_free(new_spams);
581 g_slist_free(whitelisted_new_spams);
583 if (message_callback != NULL)
584 message_callback(NULL, 0, 0, FALSE);
585 mail_filtering_data->filtered = g_slist_reverse(
586 mail_filtering_data->filtered);
587 mail_filtering_data->unfiltered = g_slist_reverse(
588 mail_filtering_data->unfiltered);
593 BogofilterConfig *bogofilter_get_config(void)
598 int bogofilter_learn(MsgInfo *msginfo, GSList *msglist, gboolean spam)
602 const gchar *bogo_exec = (config.bogopath && *config.bogopath) ? config.bogopath:"bogofilter";
604 if (msginfo == NULL && msglist == NULL) {
609 file = procmsg_get_message_file(msginfo);
613 if (message_callback != NULL)
614 message_callback(_("Bogofilter: learning from message..."), 0, 0, FALSE);
617 cmd = g_strdup_printf("%s -s -I '%s'", bogo_exec, file);
618 else if (MSG_IS_SPAM(msginfo->flags))
619 /* correct bogofilter, this wasn't spam */
620 cmd = g_strdup_printf("%s -Sn -I '%s'", bogo_exec, file);
623 cmd = g_strdup_printf("%s -n -I '%s'", bogo_exec, file);
624 if ((status = execute_command_line(cmd, FALSE)) != 0)
625 log_error(_("Learning failed; `%s` returned with status %d."),
629 if (message_callback != NULL)
630 message_callback(NULL, 0, 0, FALSE);
635 GSList *cur = msglist;
637 int total = g_slist_length(msglist);
639 gboolean some_correction = FALSE, some_no_correction = FALSE;
641 if (message_callback != NULL)
642 message_callback(_("Bogofilter: learning from messages..."), total, 0, FALSE);
644 for (cur = msglist; cur && status == 0; cur = cur->next) {
645 info = (MsgInfo *)cur->data;
647 some_no_correction = TRUE;
648 else if (MSG_IS_SPAM(info->flags))
649 /* correct bogofilter, this wasn't spam */
650 some_correction = TRUE;
652 some_no_correction = TRUE;
656 if (some_correction && some_no_correction) {
657 /* we potentially have to do different stuff for every mail */
658 for (cur = msglist; cur && status == 0; cur = cur->next) {
659 info = (MsgInfo *)cur->data;
660 file = procmsg_get_message_file(info);
664 cmd = g_strdup_printf("%s -s -I '%s'", bogo_exec, file);
665 else if (MSG_IS_SPAM(info->flags))
666 /* correct bogofilter, this wasn't spam */
667 cmd = g_strdup_printf("%s -Sn -I '%s'", bogo_exec, file);
670 cmd = g_strdup_printf("%s -n -I '%s'", bogo_exec, file);
672 if ((status = execute_command_line(cmd, FALSE)) != 0)
673 log_error(_("Learning failed; `%s` returned with status %d."),
679 if (message_callback != NULL)
680 message_callback(NULL, total, done, FALSE);
682 } else if (some_correction || some_no_correction) {
688 GError *error = NULL;
689 gboolean bogo_forked;
691 bogo_args[0] = (gchar *)bogo_exec;
692 if (some_correction && !some_no_correction)
693 bogo_args[1] = "-Sn";
694 else if (some_no_correction && !some_correction)
695 bogo_args[1] = spam ? "-s":"-n";
699 bogo_forked = g_spawn_async_with_pipes(
700 NULL, bogo_args,NULL, G_SPAWN_SEARCH_PATH|G_SPAWN_DO_NOT_REAP_CHILD,
701 NULL, NULL, &bogo_pid, &bogo_stdin,
704 while (bogo_forked && cur) {
706 info = (MsgInfo *)cur->data;
707 file = procmsg_get_message_file(info);
709 tmp = g_strdup_printf("%s\n",
711 write_all(bogo_stdin, tmp, strlen(tmp));
716 if (message_callback != NULL)
717 message_callback(NULL, total, done, FALSE);
722 waitpid(bogo_pid, &status, 0);
723 if (!WIFEXITED(status))
726 status = WEXITSTATUS(status);
728 if (!bogo_forked || status != 0) {
729 log_error(_("Learning failed; `%s %s %s` returned with error:\n%s"),
730 bogo_args[0], bogo_args[1], bogo_args[2],
731 error ? error->message:_("Unknown error"));
738 if (message_callback != NULL)
739 message_callback(NULL, 0, 0, FALSE);
745 void bogofilter_save_config(void)
750 debug_print("Saving Bogofilter Page\n");
752 rcpath = g_strconcat(get_rc_dir(), G_DIR_SEPARATOR_S, COMMON_RC, NULL);
753 pfile = prefs_write_open(rcpath);
755 if (!pfile || (prefs_set_block_label(pfile, "Bogofilter") < 0))
758 if (prefs_write_param(param, pfile->fp) < 0) {
759 g_warning("Failed to write Bogofilter configuration to file\n");
760 prefs_file_close_revert(pfile);
763 fprintf(pfile->fp, "\n");
765 prefs_file_close(pfile);
768 void bogofilter_set_message_callback(MessageCallback callback)
770 message_callback = callback;
773 gint plugin_init(gchar **error)
779 if (!check_plugin_version(MAKE_NUMERIC_VERSION(0, 9, 3, 86),
780 VERSION_NUMERIC, PLUGIN_NAME, error))
783 prefs_set_default(param);
784 rcpath = g_strconcat(get_rc_dir(), G_DIR_SEPARATOR_S, COMMON_RC, NULL);
785 prefs_read_config(param, "Bogofilter", rcpath, NULL);
788 bogofilter_gtk_init();
790 debug_print("Bogofilter plugin loaded\n");
793 bogofilter_start_thread();
796 if (config.process_emails) {
797 bogofilter_register_hook();
800 procmsg_register_spam_learner(bogofilter_learn);
801 procmsg_spam_set_folder(config.save_folder);
807 void plugin_done(void)
810 bogofilter_unregister_hook();
813 bogofilter_stop_thread();
815 g_free(config.save_folder);
816 bogofilter_gtk_done();
817 procmsg_unregister_spam_learner(bogofilter_learn);
818 procmsg_spam_set_folder(NULL);
819 debug_print("Bogofilter plugin unloaded\n");
822 const gchar *plugin_name(void)
827 const gchar *plugin_desc(void)
829 return _("This plugin can check all messages that are received from an "
830 "IMAP, LOCAL or POP account for spam using Bogofilter. "
831 "You will need Bogofilter installed locally.\n "
833 "Before Bogofilter can recognize spam messages, you have to "
834 "train it by marking a few hundred spam and ham messages. "
835 "Use \"/Mark/Mark as spam\" and \"/Mark/Mark as ham\" to "
836 "train Bogofilter.\n"
838 "When a message is identified as spam it can be deleted or "
839 "saved in a specially designated folder.\n"
841 "Options can be found in /Configuration/Preferences/Plugins/Bogofilter");
844 const gchar *plugin_type(void)
849 const gchar *plugin_licence(void)
854 const gchar *plugin_version(void)
859 struct PluginFeature *plugin_provides(void)
861 static struct PluginFeature features[] =
862 { {PLUGIN_FILTERING, N_("Spam detection")},
863 {PLUGIN_FILTERING, N_("Spam learning")},
864 {PLUGIN_NOTHING, NULL}};
868 void bogofilter_register_hook(void)
871 hook_id = hooks_register_hook(MAIL_LISTFILTERING_HOOKLIST, mail_filtering_hook, NULL);
873 g_warning("Failed to register mail filtering hook");
874 config.process_emails = FALSE;
878 void bogofilter_unregister_hook(void)
881 hooks_unregister_hook(MAIL_LISTFILTERING_HOOKLIST, hook_id);