2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2006 Hiroyuki Yamamoto and the Claws Mail Team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
26 #include <sys/types.h>
31 #include <glib/gi18n.h>
37 #include "common/claws.h"
38 #include "common/version.h"
40 #include "common/utils.h"
45 #include "prefs_gtk.h"
47 #include "bogofilter.h"
50 #include "prefs_common.h"
51 #include "alertpanel.h"
52 #include "addr_compl.h"
54 #ifdef HAVE_SYSEXITS_H
60 #ifdef HAVE_SYS_ERRNO_H
61 #include <sys/errno.h>
66 #ifdef HAVE_SYS_TIME_H
76 static guint hook_id = -1;
77 static MessageCallback message_callback;
79 static BogofilterConfig config;
81 static PrefParam param[] = {
82 {"process_emails", "TRUE", &config.process_emails, P_BOOL,
84 {"receive_spam", "TRUE", &config.receive_spam, P_BOOL,
86 {"save_folder", NULL, &config.save_folder, P_STRING,
88 {"max_size", "250", &config.max_size, P_INT,
90 {"bogopath", "bogofilter", &config.bogopath, P_STRING,
92 {"insert_header", "FALSE", &config.insert_header, P_BOOL,
94 {"whitelist_ab", "TRUE", &config.whitelist_ab, P_BOOL,
97 {NULL, NULL, NULL, P_OTHER, NULL, NULL, NULL}
101 * Helper function for spawn_with_input() - write an entire
111 gssize count = write (fd, buf, to_write);
127 typedef struct _BogoFilterData {
128 MailFilteringData *mail_filtering_data;
133 GSList *whitelisted_new_spams;
139 static BogoFilterData *to_filter_data = NULL;
141 static gboolean filter_th_done = FALSE;
142 static pthread_mutex_t list_mutex = PTHREAD_MUTEX_INITIALIZER;
143 static pthread_mutex_t wait_mutex = PTHREAD_MUTEX_INITIALIZER;
144 static pthread_cond_t wait_cond = PTHREAD_COND_INITIALIZER;
147 static gboolean found_in_addressbook(const gchar *address)
150 gboolean found = FALSE;
156 addr = g_strdup(address);
157 extract_address(addr);
158 num_addr = complete_address(addr);
160 /* skip first item (this is the search string itself) */
162 for (; i < num_addr && !found; i++) {
163 gchar *caddr = get_complete_address(i);
164 extract_address(caddr);
165 if (strcasecmp(caddr, addr) == 0)
174 static void bogofilter_do_filter(BogoFilterData *data)
177 gint bogo_stdin, bogo_stdout;
178 GError *error = NULL;
179 gboolean bogo_forked;
183 int total = 0, curnum = 0;
187 total = g_slist_length(data->msglist);
189 bogo_forked = g_spawn_async_with_pipes(
190 NULL, data->bogo_args,NULL, G_SPAWN_SEARCH_PATH|G_SPAWN_DO_NOT_REAP_CHILD,
191 NULL, NULL, &bogo_pid, &bogo_stdin,
192 &bogo_stdout, NULL, &error);
194 if (bogo_forked == FALSE) {
195 g_warning("%s\n", error ? error->message:"ERROR???");
201 if (config.whitelist_ab)
202 start_address_completion(NULL);
204 for (cur = data->msglist; cur; cur = cur->next) {
205 gboolean whitelisted = FALSE;
206 msginfo = (MsgInfo *)cur->data;
207 debug_print("Filtering message %d (%d/%d)\n", msginfo->msgnum, curnum, total);
209 if (message_callback != NULL)
210 message_callback(NULL, total, curnum++, data->in_thread);
212 if (config.whitelist_ab && msginfo->from &&
213 found_in_addressbook(msginfo->from))
216 /* can set flags (SCANNED, ATTACHMENT) but that's ok
217 * as GUI updates are hooked not direct */
219 file = procmsg_get_message_file(msginfo);
222 gchar *tmp = g_strdup_printf("%s\n",file);
223 /* send filename to bogofilter */
224 write_all(bogo_stdin, tmp, strlen(tmp));
226 memset(buf, 0, sizeof(buf));
228 if (read(bogo_stdout, buf, sizeof(buf)-1) < 0) {
229 g_warning("bogofilter short read\n");
230 debug_print("message %d is ham\n", msginfo->msgnum);
231 data->mail_filtering_data->unfiltered = g_slist_prepend(
232 data->mail_filtering_data->unfiltered, msginfo);
233 data->new_hams = g_slist_prepend(data->new_hams, msginfo);
235 gchar **parts = NULL;
236 if (strchr(buf, '/')) {
237 tmp = strrchr(buf, '/')+1;
241 parts = g_strsplit(tmp, " ", 0);
242 debug_print("read %s\n", buf);
244 /* note the result if the header if needed */
245 if (parts && parts[0] && parts[1] && parts[2] &&
246 FOLDER_TYPE(msginfo->folder->folder) == F_MH &&
247 config.insert_header) {
248 gchar *tmpfile = get_tmp_file();
249 FILE *input = fopen(file, "r");
250 FILE *output = fopen(tmpfile, "w");
251 if (strstr(parts[2], "\n"))
252 *(strstr(parts[2], "\n")) = '\0';
253 if (input && !output)
255 else if (!input && output)
258 gchar tmpbuf[BUFFSIZE];
259 const gchar *bogosity = *parts[1] == 'S' ? "Spam":
260 (*parts[1] == 'H' ? "Ham":"Unsure");
261 gchar *tmpstr = g_strdup_printf(
262 "X-Claws-Bogosity: %s, spamicity=%s%s\n",
264 whitelisted?" [whitelisted]":"");
265 fwrite(tmpstr, 1, strlen(tmpstr), output);
266 while (fgets(tmpbuf, sizeof(buf), input))
267 fputs(tmpbuf, output);
270 move_file(tmpfile, file, TRUE);
277 if (!whitelisted && parts && parts[0] && parts[1] && *parts[1] == 'S') {
279 debug_print("message %d is spam\n", msginfo->msgnum);
280 /* Spam will be filtered away */
281 data->mail_filtering_data->filtered = g_slist_prepend(
282 data->mail_filtering_data->filtered, msginfo);
283 data->new_spams = g_slist_prepend(data->new_spams, msginfo);
285 } else if (whitelisted && parts && parts[0] && parts[1] && *parts[1] == 'S') {
287 debug_print("message %d is whitelisted spam\n", msginfo->msgnum);
288 /* Whitelisted spam will *not* be filtered away, but continue
289 * their trip through filtering as if it was ham. */
290 data->mail_filtering_data->unfiltered = g_slist_prepend(
291 data->mail_filtering_data->unfiltered, msginfo);
292 /* But it gets put in a different list, so that we
293 * can still flag it and inform the user that it is
294 * considered a spam (so that he can teach bogo that
296 data->whitelisted_new_spams = g_slist_prepend(data->whitelisted_new_spams, msginfo);
300 debug_print("message %d is ham\n", msginfo->msgnum);
301 data->mail_filtering_data->unfiltered = g_slist_prepend(
302 data->mail_filtering_data->unfiltered, msginfo);
303 data->new_hams = g_slist_prepend(data->new_hams, msginfo);
310 data->mail_filtering_data->unfiltered = g_slist_prepend(
311 data->mail_filtering_data->unfiltered, msginfo);
312 data->new_hams = g_slist_prepend(data->new_hams, msginfo);
315 if (config.whitelist_ab)
316 end_address_completion();
321 waitpid(bogo_pid, &status, 0);
322 if (!WIFEXITED(status))
325 status = WEXITSTATUS(status);
328 to_filter_data->status = status;
332 static void *bogofilter_filtering_thread(void *data)
334 while (!filter_th_done) {
335 pthread_mutex_lock(&list_mutex);
336 if (to_filter_data == NULL || to_filter_data->done == TRUE) {
337 pthread_mutex_unlock(&list_mutex);
338 debug_print("thread is waiting for something to filter\n");
339 pthread_mutex_lock(&wait_mutex);
340 pthread_cond_wait(&wait_cond, &wait_mutex);
341 pthread_mutex_unlock(&wait_mutex);
343 debug_print("thread awaken with something to filter\n");
344 to_filter_data->done = FALSE;
345 bogofilter_do_filter(to_filter_data);
346 pthread_mutex_unlock(&list_mutex);
347 to_filter_data->done = TRUE;
354 static pthread_t filter_th = 0;
356 static void bogofilter_start_thread(void)
358 filter_th_done = FALSE;
359 if (filter_th != 0 || 1)
361 if (pthread_create(&filter_th, 0,
362 bogofilter_filtering_thread,
367 debug_print("thread created\n");
370 static void bogofilter_stop_thread(void)
373 while (pthread_mutex_trylock(&list_mutex) != 0) {
377 if (filter_th != 0) {
378 filter_th_done = TRUE;
379 debug_print("waking thread up\n");
380 pthread_mutex_lock(&wait_mutex);
381 pthread_cond_broadcast(&wait_cond);
382 pthread_mutex_unlock(&wait_mutex);
383 pthread_join(filter_th, &res);
386 pthread_mutex_unlock(&list_mutex);
387 debug_print("thread done\n");
391 static gboolean mail_filtering_hook(gpointer source, gpointer data)
393 MailFilteringData *mail_filtering_data = (MailFilteringData *) source;
394 MsgInfo *msginfo = mail_filtering_data->msginfo;
395 GSList *msglist = mail_filtering_data->msglist;
397 static gboolean warned_error = FALSE;
399 int total = 0, curnum = 0;
400 GSList *new_hams = NULL, *new_spams = NULL, *whitelisted_new_spams = NULL;
401 gchar *bogo_exec = (config.bogopath && *config.bogopath) ? config.bogopath:"bogofilter";
403 gboolean ok_to_thread = TRUE;
405 bogo_args[0] = bogo_exec;
410 if (!config.process_emails) {
414 if (msglist == NULL && msginfo != NULL) {
415 g_warning("wrong call to bogofilter mail_filtering_hook");
419 total = g_slist_length(msglist);
421 /* we have to make sure the mails are cached - or it'll break on IMAP */
422 if (message_callback != NULL)
423 message_callback(_("Bogofilter: fetching bodies..."), total, 0, FALSE);
424 for (cur = msglist; cur; cur = cur->next) {
425 gchar *file = procmsg_get_message_file((MsgInfo *)cur->data);
427 ok_to_thread = FALSE;
428 if (message_callback != NULL)
429 message_callback(NULL, total, curnum++, FALSE);
432 if (message_callback != NULL)
433 message_callback(NULL, 0, 0, FALSE);
435 if (message_callback != NULL)
436 message_callback(_("Bogofilter: filtering messages..."), total, 0, FALSE);
439 while (pthread_mutex_trylock(&list_mutex) != 0) {
444 to_filter_data = g_new0(BogoFilterData, 1);
445 to_filter_data->msglist = msglist;
446 to_filter_data->mail_filtering_data = mail_filtering_data;
447 to_filter_data->new_hams = NULL;
448 to_filter_data->new_spams = NULL;
449 to_filter_data->whitelisted_new_spams = NULL;
450 to_filter_data->done = FALSE;
451 to_filter_data->status = -1;
452 to_filter_data->bogo_args = bogo_args;
454 to_filter_data->in_thread = (filter_th != 0 && ok_to_thread);
456 to_filter_data->in_thread = FALSE;
460 pthread_mutex_unlock(&list_mutex);
462 if (filter_th != 0 && ok_to_thread) {
463 debug_print("waking thread to let it filter things\n");
464 pthread_mutex_lock(&wait_mutex);
465 pthread_cond_broadcast(&wait_cond);
466 pthread_mutex_unlock(&wait_mutex);
468 while (!to_filter_data->done) {
474 while (pthread_mutex_trylock(&list_mutex) != 0) {
479 if (filter_th == 0 || !ok_to_thread)
480 bogofilter_do_filter(to_filter_data);
482 bogofilter_do_filter(to_filter_data);
485 new_hams = to_filter_data->new_hams;
486 new_spams = to_filter_data->new_spams;
487 whitelisted_new_spams = to_filter_data->whitelisted_new_spams;
488 status = to_filter_data->status;
489 g_free(to_filter_data);
490 to_filter_data = NULL;
492 pthread_mutex_unlock(&list_mutex);
497 for (cur = new_hams; cur; cur = cur->next) {
498 MsgInfo *msginfo = (MsgInfo *)cur->data;
499 procmsg_msginfo_unset_flags(msginfo, MSG_SPAM, 0);
501 /* flag whitelisted spams */
502 for (cur = whitelisted_new_spams; cur; cur = cur->next) {
503 MsgInfo *msginfo = (MsgInfo *)cur->data;
504 procmsg_msginfo_set_flags(msginfo, MSG_SPAM, 0);
506 /* flag spams and delete them if !config.receive_spam
507 * (if config.receive_spam is set, we'll move them later) */
508 for (cur = new_spams; cur; cur = cur->next) {
509 MsgInfo *msginfo = (MsgInfo *)cur->data;
510 if (config.receive_spam) {
511 procmsg_msginfo_change_flags(msginfo, MSG_SPAM, 0, ~0, 0);
513 folder_item_remove_msg(msginfo->folder, msginfo->msgnum);
517 if (status < 0 || status > 2) { /* I/O or other errors */
521 msg = g_strdup_printf(_("The Bogofilter plugin couldn't filter "
522 "a message. The probable cause of the "
523 "error is that it didn't learn from any mail.\n"
524 "Use \"/Mark/Mark as spam\" and \"/Mark/Mark as "
525 "ham\" to train Bogofilter with a few hundred "
526 "spam and ham messages."));
528 msg = g_strdup_printf(_("The Bogofilter plugin couldn't filter "
529 "a message. the command `%s %s %s` couldn't be run."),
530 bogo_args[0], bogo_args[1], bogo_args[2]);
531 if (!prefs_common.no_recv_err_panel) {
533 alertpanel_error(msg);
537 gchar *tmp = g_strdup_printf("%s\n", msg);
543 if (status < 0 || status > 2) {
544 g_slist_free(mail_filtering_data->filtered);
545 g_slist_free(mail_filtering_data->unfiltered);
546 mail_filtering_data->filtered = NULL;
547 mail_filtering_data->unfiltered = NULL;
548 } else if (config.receive_spam && new_spams) {
549 FolderItem *save_folder;
551 if ((!config.save_folder) ||
552 (config.save_folder[0] == '\0') ||
553 ((save_folder = folder_find_item_from_identifier(config.save_folder)) == NULL))
554 save_folder = folder_get_default_trash();
556 for (cur = new_spams; cur; cur = cur->next) {
557 msginfo = (MsgInfo *)cur->data;
558 msginfo->is_move = TRUE;
559 msginfo->to_filter_folder = save_folder;
563 g_slist_free(new_hams);
564 g_slist_free(new_spams);
565 g_slist_free(whitelisted_new_spams);
567 if (message_callback != NULL)
568 message_callback(NULL, 0, 0, FALSE);
569 mail_filtering_data->filtered = g_slist_reverse(
570 mail_filtering_data->filtered);
571 mail_filtering_data->unfiltered = g_slist_reverse(
572 mail_filtering_data->unfiltered);
577 BogofilterConfig *bogofilter_get_config(void)
582 int bogofilter_learn(MsgInfo *msginfo, GSList *msglist, gboolean spam)
586 const gchar *bogo_exec = (config.bogopath && *config.bogopath) ? config.bogopath:"bogofilter";
588 if (msginfo == NULL && msglist == NULL) {
593 file = procmsg_get_message_file(msginfo);
597 if (message_callback != NULL)
598 message_callback(_("Bogofilter: learning from message..."), 0, 0, FALSE);
601 cmd = g_strdup_printf("%s -s -I '%s'", bogo_exec, file);
602 else if (MSG_IS_SPAM(msginfo->flags))
603 /* correct bogofilter, this wasn't spam */
604 cmd = g_strdup_printf("%s -Sn -I '%s'", bogo_exec, file);
607 cmd = g_strdup_printf("%s -n -I '%s'", bogo_exec, file);
608 if ((status = execute_command_line(cmd, FALSE)) != 0)
609 log_error(_("Learning failed; `%s` returned with status %d."),
613 if (message_callback != NULL)
614 message_callback(NULL, 0, 0, FALSE);
619 GSList *cur = msglist;
621 int total = g_slist_length(msglist);
623 gboolean some_correction = FALSE, some_no_correction = FALSE;
625 if (message_callback != NULL)
626 message_callback(_("Bogofilter: learning from messages..."), total, 0, FALSE);
628 for (cur = msglist; cur && status == 0; cur = cur->next) {
629 info = (MsgInfo *)cur->data;
631 some_no_correction = TRUE;
632 else if (MSG_IS_SPAM(info->flags))
633 /* correct bogofilter, this wasn't spam */
634 some_correction = TRUE;
636 some_no_correction = TRUE;
640 if (some_correction && some_no_correction) {
641 /* we potentially have to do different stuff for every mail */
642 for (cur = msglist; cur && status == 0; cur = cur->next) {
643 info = (MsgInfo *)cur->data;
644 file = procmsg_get_message_file(info);
648 cmd = g_strdup_printf("%s -s -I '%s'", bogo_exec, file);
649 else if (MSG_IS_SPAM(info->flags))
650 /* correct bogofilter, this wasn't spam */
651 cmd = g_strdup_printf("%s -Sn -I '%s'", bogo_exec, file);
654 cmd = g_strdup_printf("%s -n -I '%s'", bogo_exec, file);
656 if ((status = execute_command_line(cmd, FALSE)) != 0)
657 log_error(_("Learning failed; `%s` returned with status %d."),
663 if (message_callback != NULL)
664 message_callback(NULL, total, done, FALSE);
666 } else if (some_correction || some_no_correction) {
672 GError *error = NULL;
673 gboolean bogo_forked;
675 bogo_args[0] = (gchar *)bogo_exec;
676 if (some_correction && !some_no_correction)
677 bogo_args[1] = "-Sn";
678 else if (some_no_correction && !some_correction)
679 bogo_args[1] = spam ? "-s":"-n";
683 bogo_forked = g_spawn_async_with_pipes(
684 NULL, bogo_args,NULL, G_SPAWN_SEARCH_PATH|G_SPAWN_DO_NOT_REAP_CHILD,
685 NULL, NULL, &bogo_pid, &bogo_stdin,
688 while (bogo_forked && cur) {
690 info = (MsgInfo *)cur->data;
691 file = procmsg_get_message_file(info);
693 tmp = g_strdup_printf("%s\n",
695 write_all(bogo_stdin, tmp, strlen(tmp));
700 if (message_callback != NULL)
701 message_callback(NULL, total, done, FALSE);
706 waitpid(bogo_pid, &status, 0);
707 if (!WIFEXITED(status))
710 status = WEXITSTATUS(status);
712 if (!bogo_forked || status != 0) {
713 log_error(_("Learning failed; `%s %s %s` returned with error:\n%s"),
714 bogo_args[0], bogo_args[1], bogo_args[2],
715 error ? error->message:_("Unknown error"));
722 if (message_callback != NULL)
723 message_callback(NULL, 0, 0, FALSE);
729 void bogofilter_save_config(void)
734 debug_print("Saving Bogofilter Page\n");
736 rcpath = g_strconcat(get_rc_dir(), G_DIR_SEPARATOR_S, COMMON_RC, NULL);
737 pfile = prefs_write_open(rcpath);
739 if (!pfile || (prefs_set_block_label(pfile, "Bogofilter") < 0))
742 if (prefs_write_param(param, pfile->fp) < 0) {
743 g_warning("Failed to write Bogofilter configuration to file\n");
744 prefs_file_close_revert(pfile);
747 fprintf(pfile->fp, "\n");
749 prefs_file_close(pfile);
752 void bogofilter_set_message_callback(MessageCallback callback)
754 message_callback = callback;
757 gint plugin_init(gchar **error)
763 if ((claws_get_version() > VERSION_NUMERIC)) {
764 *error = g_strdup(_("Your version of Claws Mail is newer than the version the Bogofilter plugin was built with"));
768 if ((claws_get_version() < MAKE_NUMERIC_VERSION(0, 9, 3, 86))) {
769 *error = g_strdup(_("Your version of Claws Mail is too old for the Bogofilter plugin"));
773 prefs_set_default(param);
774 rcpath = g_strconcat(get_rc_dir(), G_DIR_SEPARATOR_S, COMMON_RC, NULL);
775 prefs_read_config(param, "Bogofilter", rcpath, NULL);
778 bogofilter_gtk_init();
780 debug_print("Bogofilter plugin loaded\n");
783 bogofilter_start_thread();
786 if (config.process_emails) {
787 bogofilter_register_hook();
790 procmsg_register_spam_learner(bogofilter_learn);
791 procmsg_spam_set_folder(config.save_folder);
797 void plugin_done(void)
800 bogofilter_unregister_hook();
803 bogofilter_stop_thread();
805 g_free(config.save_folder);
806 bogofilter_gtk_done();
807 procmsg_unregister_spam_learner(bogofilter_learn);
808 procmsg_spam_set_folder(NULL);
809 debug_print("Bogofilter plugin unloaded\n");
812 const gchar *plugin_name(void)
814 return _("Bogofilter");
817 const gchar *plugin_desc(void)
819 return _("This plugin can check all messages that are received from an "
820 "IMAP, LOCAL or POP account for spam using Bogofilter. "
821 "You will need Bogofilter installed locally.\n "
823 "Before Bogofilter can recognize spam messages, you have to "
824 "train it by marking a few hundred spam and ham messages. "
825 "Use \"/Mark/Mark as spam\" and \"/Mark/Mark as ham\" to "
826 "train Bogofilter.\n"
828 "When a message is identified as spam it can be deleted or "
829 "saved in a specially designated folder.\n"
831 "Options can be found in /Configuration/Preferences/Plugins/Bogofilter");
834 const gchar *plugin_type(void)
839 const gchar *plugin_licence(void)
844 const gchar *plugin_version(void)
849 struct PluginFeature *plugin_provides(void)
851 static struct PluginFeature features[] =
852 { {PLUGIN_FILTERING, N_("Spam detection")},
853 {PLUGIN_FILTERING, N_("Spam learning")},
854 {PLUGIN_NOTHING, NULL}};
858 void bogofilter_register_hook(void)
861 hook_id = hooks_register_hook(MAIL_LISTFILTERING_HOOKLIST, mail_filtering_hook, NULL);
863 g_warning("Failed to register mail filtering hook");
864 config.process_emails = FALSE;
868 void bogofilter_unregister_hook(void)
871 hooks_unregister_hook(MAIL_LISTFILTERING_HOOKLIST, hook_id);