2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 2002-2012 Match Grun and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * Functions for an E-Mail address harvester.
29 #include "proctypes.h"
32 #include "addrharvest.h"
35 #ifdef USE_ALT_ADDRBOOK
36 #include "addressbook-dbus.h"
39 /* Mail header names of interest */
40 static gchar *_headerFrom_ = HEADER_FROM;
41 static gchar *_headerReplyTo_ = HEADER_REPLY_TO;
42 static gchar *_headerSender_ = HEADER_SENDER;
43 static gchar *_headerErrorsTo_ = HEADER_ERRORS_TO;
44 static gchar *_headerCC_ = HEADER_CC;
45 static gchar *_headerTo_ = HEADER_TO;
47 #define ADDR_BUFFSIZE 1024
48 #define MSG_BUFFSIZE 2048
49 #define MSGNUM_BUFFSIZE 32
50 #define DFL_FOLDER_SIZE 20
52 /* Noise strings included by some other E-Mail clients */
53 #define REM_NAME_STRING "(Email)"
54 #define REM_NAME_STRING2 "(Email 2)"
56 /* Directories to ignore */
57 #define DIR_IGNORE ".\t..\t.sylpheed_mark\t.sylpheed_claws_cache"
69 #ifdef USE_ALT_ADDRBOOK
78 * Build header table entry.
79 * Enter: harvester Harvester object.
82 static void addrharvest_build_entry(
83 AddressHarvester* harvester, gchar *name )
87 entry = g_new0( HeaderEntry, 1 );
89 entry->selected = FALSE;
92 harvester->headerTable = g_list_append( harvester->headerTable, entry );
98 static gint addrharvest_free_table_vis( gpointer key, gpointer value, gpointer data ) {
108 static void addrharvest_free_table( AddressHarvester* harvester ) {
112 /* Free header list */
113 node = harvester->headerTable;
115 entry = ( HeaderEntry * ) node->data;
116 entry->header = NULL;
117 entry->selected = FALSE;
118 entry->folder = NULL;
121 node = g_list_next( node );
123 g_list_free( harvester->headerTable );
124 harvester->headerTable = NULL;
126 /* Free duplicate table */
127 g_hash_table_foreach_remove( harvester->dupTable, addrharvest_free_table_vis, NULL );
128 g_hash_table_destroy( harvester->dupTable );
129 harvester->dupTable = NULL;
136 AddressHarvester *addrharvest_create( void ) {
137 AddressHarvester *harvester;
139 harvester = g_new0( AddressHarvester, 1 );
140 harvester->path = NULL;
141 harvester->dupTable = g_hash_table_new( g_str_hash, g_str_equal );
142 harvester->folderSize = DFL_FOLDER_SIZE;
143 harvester->retVal = MGU_SUCCESS;
145 /* Build header table */
146 harvester->headerTable = NULL;
147 addrharvest_build_entry( harvester, _headerFrom_ );
148 addrharvest_build_entry( harvester, _headerReplyTo_ );
149 addrharvest_build_entry( harvester, _headerSender_ );
150 addrharvest_build_entry( harvester, _headerErrorsTo_ );
151 addrharvest_build_entry( harvester, _headerCC_ );
152 addrharvest_build_entry( harvester, _headerTo_ );
161 * Specify path to folder that will be harvested.
162 * Entry: harvester Harvester object.
163 * value Full directory path.
165 void addrharvest_set_path( AddressHarvester* harvester, const gchar *value ) {
166 cm_return_if_fail( harvester != NULL );
167 harvester->path = mgu_replace_string( harvester->path, value );
168 g_strstrip( harvester->path );
172 * Specify maximum folder size.
173 * Entry: harvester Harvester object.
176 void addrharvest_set_folder_size(
177 AddressHarvester* harvester, const gint value )
179 cm_return_if_fail( harvester != NULL );
181 harvester->folderSize = value;
186 * Specify folder recursion.
187 * Entry: harvester Harvester object.
188 * value TRUE to process sub-folders, FALSE to process folder only.
190 void addrharvest_set_recurse(
191 AddressHarvester* harvester, const gboolean value )
193 cm_return_if_fail( harvester != NULL );
194 harvester->folderRecurse = value;
198 * Search (case insensitive) for header entry with specified name.
199 * Enter: harvester Harvester.
201 * Return: Header, or NULL if not found.
203 static HeaderEntry *addrharvest_find(
204 AddressHarvester* harvester, const gchar *name ) {
209 node = harvester->headerTable;
214 if (g_ascii_strncasecmp(entry->header, name,
215 strlen(entry->header)) == 0 ) {
219 node = g_list_next( node );
225 * Set selection for specified heaader.
226 * Enter: harvester Harvester.
228 * value Value to set.
230 void addrharvest_set_header(
231 AddressHarvester* harvester, const gchar *name, const gboolean value )
235 cm_return_if_fail( harvester != NULL );
236 entry = addrharvest_find( harvester, name );
237 if( entry != NULL ) {
238 entry->selected = value;
244 * Enter: harvester Harvester.
246 * Return: Address count, or -1 if header not found.
248 gint addrharvest_get_count( AddressHarvester* harvester, const gchar *name ) {
253 cm_return_val_if_fail( harvester != NULL, count );
254 entry = addrharvest_find( harvester, name );
255 if( entry != NULL ) {
256 count = entry->count;
262 * Free up object by releasing internal memory.
263 * Enter: harvester Harvester.
265 void addrharvest_free( AddressHarvester *harvester ) {
266 cm_return_if_fail( harvester != NULL );
268 /* Free internal stuff */
269 addrharvest_free_table( harvester );
270 g_free( harvester->path );
273 harvester->path = NULL;
274 harvester->retVal = MGU_SUCCESS;
275 harvester->headerTable = NULL;
277 harvester->folderSize = 0;
279 /* Now release object */
283 #ifdef USE_ALT_ADDRBOOK
284 static gchar* get_namepart(const gchar* name, Namepart namepart) {
285 gchar *pos, *part = NULL;
286 gchar *token = g_strdup(name);
288 pos = g_strrstr(token, " ");
289 if (namepart == FIRST) {
292 part = g_strdup(token);
298 part = g_strdup(token);
301 part = g_strdup(pos);
310 * Insert address into cache.
311 * Enter: harvester Harvester object.
312 * entry Header object.
313 * cache Address cache to load.
315 * address eMail address.
317 static void addrharvest_insert_cache(
318 AddressHarvester *harvester, HeaderEntry *entry,
319 AddressCache *cache, const gchar *name,
320 const gchar *address )
322 #ifndef USE_ALT_ADDRBOOK
331 folder = entry->folder;
332 if( folder == NULL ) {
333 newFolder = TRUE; /* No folder yet */
335 if( entry->count % harvester->folderSize == 0 ) {
336 newFolder = TRUE; /* Folder is full */
339 ContactEntry* person;
344 key = g_utf8_strdown( address, -1 );
345 person = g_hash_table_lookup( harvester->dupTable, key );
346 #ifndef USE_ALT_ADDRBOOK
348 /* Update existing person to use longest name */
349 value = ADDRITEM_NAME(person);
350 if( strlen( name ) > strlen( value ) ) {
351 addritem_person_set_common_name( person, name );
356 /* Folder if required */
358 cnt = 1 + ( entry->count / harvester->folderSize );
359 folderName =g_strdup_printf( "%s (%d)",
360 entry->header, cnt );
361 folder = addritem_create_item_folder();
362 addritem_folder_set_name( folder, folderName );
363 addritem_folder_set_remarks( folder, "" );
364 addrcache_id_folder( cache, folder );
365 addrcache_add_folder( cache, folder );
366 entry->folder = folder;
367 g_free( folderName );
371 person = addrcache_add_contact(
372 cache, folder, name, address, "" );
373 g_hash_table_insert( harvester->dupTable, key, person );
376 addritem_parse_first_last( person );
379 person = g_new0(ContactEntry, 1);
380 person->first_name = get_namepart(name, FIRST);
381 person->last_name = get_namepart(name, LAST);
382 person->email = g_strdup(address);
383 g_hash_table_insert(harvester->dupTable, key, person);
390 * Remove specified string from name.
392 * str String to remove.
394 static void addrharvest_del_email( gchar *name, gchar *str ) {
398 lenr = strlen( str );
399 while((p = strcasestr( name, str )) != NULL) {
401 memmove( p, p + lenr, lenn );
406 * Find position of at (@) character in buffer.
407 * Enter: buffer Start of buffer.
408 * Return: Position of at character, or NULL if not found.
409 * Note: This function searches for the last occurrence of an 'at' character
410 * prior to a valid delimiter character for the end of address. This enables
411 * an address to be found where it is also used as the name of the
412 * recipient. For example:
413 * "axle.rose@netscape.com" <axle.rose@netscape.com>
414 * The last occurrence of the at character is detected.
416 static gchar *addrharvest_find_at( const gchar *buffer ) {
420 atCh = strchr( buffer, '@' );
422 /* Search forward for another one */
445 * Find start and end of address string.
446 * Enter: buf Start address of buffer to process (not modified).
447 * atp Pointer to email at (@) character.
448 * bp Pointer to start of email address (returned).
449 * ep Pointer to end of email address (returned).
451 static void addrharvest_find_address(
452 const gchar *buf, const gchar *atp, const gchar **bp,
457 /* Find first non-separator char */
461 if( strchr( ",; \n\r", *p ) == NULL ) break;
466 /* Search forward for end of address */
470 if( strchr( ",;", *p ) ) break;
477 * Extract E-Mail address from buffer. If found, address is removed from
479 * Enter: buffer Address buffer.
480 * Return: E-Mail address, or NULL if none found. Must g_free() when done.
482 static gchar *addrharvest_extract_address( gchar *buffer ) {
484 gchar *atCh, *p, *bp, *ep;
488 atCh = addrharvest_find_at( buffer );
490 /* Search back for start of address */
493 while( p >= buffer ) {
503 /* Search fwd for end */
511 else if( *p == ' ' ) {
522 addr = g_strndup( bp, len + 1 );
523 memmove( bp, ep, len );
532 * Parse address from header buffer creating address in cache.
533 * Enter: harvester Harvester object.
534 * entry Header object.
535 * cache Address cache to load.
536 * hdrBuf Pointer to header buffer.
538 static void addrharvest_parse_address(
539 AddressHarvester *harvester, HeaderEntry *entry,
540 AddressCache *cache, const gchar *hdrBuf )
542 gchar buffer[ ADDR_BUFFSIZE + 2 ];
545 gchar *atCh, *email, *name;
548 /* Search for an address */
549 while((atCh = addrharvest_find_at( hdrBuf )) != NULL) {
550 /* Find addres string */
551 addrharvest_find_address( hdrBuf, atCh, &bp, &ep );
553 /* Copy into buffer */
554 bufLen = ( size_t ) ( ep - bp );
555 if( bufLen > ADDR_BUFFSIZE -1 ) {
556 bufLen = ADDR_BUFFSIZE - 1;
558 strncpy( buffer, bp, bufLen );
559 buffer[ bufLen ] = '\0';
560 buffer[ bufLen + 1 ] = '\0';
561 buffer[ bufLen + 2 ] = '\0';
563 /* Extract address from buffer */
564 email = addrharvest_extract_address( buffer );
566 /* Unescape characters */
567 mgu_str_unescape( buffer );
569 /* Remove noise characaters */
570 addrharvest_del_email( buffer, REM_NAME_STRING );
571 addrharvest_del_email( buffer, REM_NAME_STRING2 );
573 /* Remove leading trailing quotes and spaces */
574 mgu_str_ltc2space( buffer, '\"', '\"' );
575 mgu_str_ltc2space( buffer, '\'', '\'' );
576 mgu_str_ltc2space( buffer, '\"', '\"' );
577 mgu_str_ltc2space( buffer, '(', ')' );
578 g_strstrip( buffer );
580 if( g_ascii_strcasecmp( buffer, email ) == 0 )
583 name = conv_unmime_header(buffer, NULL, TRUE);
585 /* Insert into address book */
586 #ifndef USE_ALT_ADDRBOOK
587 addrharvest_insert_cache(
588 harvester, entry, cache, name, email );
590 addrharvest_insert_cache(
591 harvester, entry, NULL, name, email);
601 * Test whether buffer contains a header that appears in header list.
602 * Enter: listHdr Header list.
604 * Return: TRUE if header in list.
606 static gboolean addrharvest_check_hdr( GList *listHdr, gchar *buf ) {
609 gchar *p, *hdr, *nhdr;
613 p = strchr( buf, ':' );
615 len = ( size_t ) ( p - buf );
616 hdr = g_strndup( buf, len );
620 if (g_ascii_strncasecmp(nhdr, hdr, strlen(nhdr)) == 0 ) {
624 node = g_list_next( node );
632 * Read header into a linked list of lines.
633 * Enter: fp File to read.
634 * listHdr List of header lines of interest.
635 * done End of headers or end of file reached.
636 * Return: Linked list of lines.
638 static GSList *addrharvest_get_header( FILE *fp, GList *listHdr, gboolean *done ) {
640 gchar buf[ MSG_BUFFSIZE + 2 ];
647 if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
652 /* Test for end of headers */
653 if( buf[0] == '\r' || buf[0] == '\n' ) {
658 /* Test whether required header */
659 foundHdr = addrharvest_check_hdr( listHdr, buf );
661 /* Read all header lines. Only add reqd ones to list */
667 list = g_slist_append( list, p );
670 /* Read first character */
672 if( ch == ' ' || ch == '\t' ) {
673 /* Continuation character - read into buffer */
674 if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
683 /* Push back character for next header */
694 * Read specified file into address book.
695 * Enter: harvester Harvester object.
696 * fileName File to read.
697 * cache Address cache to load.
700 static gint addrharvest_readfile(
701 AddressHarvester *harvester, const gchar *fileName,
702 AddressCache *cache, GList *listHdr )
706 gchar *buf, *addr, *p;
711 msgFile = g_fopen( fileName, "rb" );
713 /* Cannot open file */
714 retVal = MGU_OPEN_FILE;
720 list = addrharvest_get_header( msgFile, listHdr, &done );
727 buf = mgu_list_coalesce( list );
728 mgu_free_list( list );
730 if(( p = strchr( buf, ':' ) ) != NULL ) {
734 entry = addrharvest_find( harvester, buf );
735 if( entry && entry->selected ) {
736 /* Sanitize control characters */
739 if( *p == '\r' || *p == '\n' || *p == '\t' )
743 addrharvest_parse_address(
744 harvester, entry, cache, addr );
755 * Read all files in specified directory into address book. Directories are
756 * traversed recursively if necessary.
757 * Enter: harvester Harvester object.
758 * cache Address cache to load.
759 * msgList List of message numbers, or NULL to process folder.
760 * dir Directory to process.
762 static void addrharvest_harvest_dir(
763 AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
769 GError *error = NULL;
772 debug_print("Harvesting addresses from dir '%s'\n", dir);
774 if( ( dp = g_dir_open( dir, 0, &error ) ) == NULL ) {
775 debug_print("opening '%s' failed: %d (%s)\n", dir,
776 error->code, error->message);
781 /* Process directory */
782 while( (d = g_dir_read_name( dp )) != NULL ) {
783 fullname = g_strconcat(dir, G_DIR_SEPARATOR_S, d, NULL);
784 if( g_file_test(fullname, G_FILE_TEST_IS_DIR) ) {
785 if( harvester->folderRecurse ) {
786 if( strstr( DIR_IGNORE, d ) != NULL ) {
791 addrharvest_harvest_dir(
792 harvester, cache, listHdr, (gchar *)fullname );
795 if( g_file_test(fullname, G_FILE_TEST_IS_REGULAR) ) {
796 if( ( num = to_number( d ) ) >= 0 ) {
797 addrharvest_readfile(
798 harvester, fullname, cache, listHdr );
807 * Read list of files in specified directory into address book.
808 * Enter: harvester Harvester object.
809 * cache Address cache to load.
810 * msgList List of message numbers, or NULL to process folder.
812 static void addrharvest_harvest_list(
813 AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
820 if (!g_file_test(harvester->path, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR)) {
821 debug_print("'%s' doesn't exist or is not a dir\n", harvester->path);
825 /* Process message list */
828 num = GPOINTER_TO_UINT( node->data );
829 fullname = g_strdup_printf("%s%c%d",
830 harvester->path, G_DIR_SEPARATOR, num);
831 addrharvest_readfile( harvester, fullname, cache, listHdr );
833 node = g_list_next( node );
838 * ============================================================================
839 * Read all files in specified directory into address book.
840 * Enter: harvester Harvester object.
841 * cache Address cache to load.
842 * msgList List of message numbers, or NULL to process folder.
844 * ============================================================================
846 gint addrharvest_harvest(
847 AddressHarvester *harvester, AddressCache *cache, GList *msgList )
853 retVal = MGU_BAD_ARGS;
854 cm_return_val_if_fail( harvester != NULL, retVal );
855 #ifndef USE_ALT_ADDRBOOK
856 cm_return_val_if_fail( cache != NULL, retVal );
858 cm_return_val_if_fail( harvester->path != NULL, retVal );
860 #ifndef USE_ALT_ADDRBOOK
862 addrcache_clear( cache );
863 cache->dataRead = FALSE;
865 /* Build list of headers of interest */
867 node = harvester->headerTable;
872 if( entry->selected ) {
875 p = g_utf8_strdown( entry->header, -1 );
876 listHdr = g_list_append( listHdr, p );
878 node = g_list_next( node );
881 /* Process directory/files */
882 if( msgList == NULL ) {
883 addrharvest_harvest_dir( harvester, cache, listHdr, harvester->path );
886 addrharvest_harvest_list( harvester, cache, listHdr, msgList );
888 mgu_free_dlist( listHdr );
890 #ifndef USE_ALT_ADDRBOOK
892 cache->modified = FALSE;
893 cache->dataRead = TRUE;
899 * ============================================================================
900 * Test whether any headers have been selected for processing.
901 * Enter: harvester Harvester object.
902 * Return: TRUE if a header was selected, FALSE if none were selected.
903 * ============================================================================
905 gboolean addrharvest_check_header( AddressHarvester *harvester ) {
910 cm_return_val_if_fail( harvester != NULL, retVal );
912 node = harvester->headerTable;
916 entry = ( HeaderEntry * ) node->data;
917 if( entry->selected ) return TRUE;
918 node = g_list_next( node );
924 * ============================================================================
926 * ============================================================================