2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 2002-2007 Match Grun and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * Functions for an E-Mail address harvester.
31 #include "addrharvest.h"
35 /* Mail header names of interest */
36 static gchar *_headerFrom_ = HEADER_FROM;
37 static gchar *_headerReplyTo_ = HEADER_REPLY_TO;
38 static gchar *_headerSender_ = HEADER_SENDER;
39 static gchar *_headerErrorsTo_ = HEADER_ERRORS_TO;
40 static gchar *_headerCC_ = HEADER_CC;
41 static gchar *_headerTo_ = HEADER_TO;
43 #define ADDR_BUFFSIZE 1024
44 #define MSG_BUFFSIZE 2048
45 #define MSGNUM_BUFFSIZE 32
46 #define DFL_FOLDER_SIZE 20
48 /* Noise strings included by some other E-Mail clients */
49 #define REM_NAME_STRING "(Email)"
50 #define REM_NAME_STRING2 "(Email 2)"
52 /* Directories to ignore */
53 #define DIR_IGNORE ".\t..\t.sylpheed_mark\t.sylpheed_claws_cache"
58 typedef struct _HeaderEntry HeaderEntry;
67 * Build header table entry.
68 * Enter: harvester Harvester object.
71 static void addrharvest_build_entry(
72 AddressHarvester* harvester, gchar *name )
76 entry = g_new0( HeaderEntry, 1 );
78 entry->selected = FALSE;
81 harvester->headerTable = g_list_append( harvester->headerTable, entry );
87 static gint addrharvest_free_table_vis( gpointer key, gpointer value, gpointer data ) {
97 static void addrharvest_free_table( AddressHarvester* harvester ) {
101 /* Free header list */
102 node = harvester->headerTable;
104 entry = ( HeaderEntry * ) node->data;
105 entry->header = NULL;
106 entry->selected = FALSE;
107 entry->folder = NULL;
110 node = g_list_next( node );
112 g_list_free( harvester->headerTable );
113 harvester->headerTable = NULL;
115 /* Free duplicate table */
116 g_hash_table_foreach_remove( harvester->dupTable, addrharvest_free_table_vis, NULL );
117 g_hash_table_destroy( harvester->dupTable );
118 harvester->dupTable = NULL;
125 AddressHarvester *addrharvest_create( void ) {
126 AddressHarvester *harvester;
128 harvester = g_new0( AddressHarvester, 1 );
129 harvester->path = NULL;
130 harvester->dupTable = g_hash_table_new( g_str_hash, g_str_equal );
131 harvester->folderSize = DFL_FOLDER_SIZE;
132 harvester->retVal = MGU_SUCCESS;
134 /* Build header table */
135 harvester->headerTable = NULL;
136 addrharvest_build_entry( harvester, _headerFrom_ );
137 addrharvest_build_entry( harvester, _headerReplyTo_ );
138 addrharvest_build_entry( harvester, _headerSender_ );
139 addrharvest_build_entry( harvester, _headerErrorsTo_ );
140 addrharvest_build_entry( harvester, _headerCC_ );
141 addrharvest_build_entry( harvester, _headerTo_ );
150 * Specify path to folder that will be harvested.
151 * Entry: harvester Harvester object.
152 * value Full directory path.
154 void addrharvest_set_path( AddressHarvester* harvester, const gchar *value ) {
155 g_return_if_fail( harvester != NULL );
156 harvester->path = mgu_replace_string( harvester->path, value );
157 g_strstrip( harvester->path );
161 * Specify maximum folder size.
162 * Entry: harvester Harvester object.
165 void addrharvest_set_folder_size(
166 AddressHarvester* harvester, const gint value )
168 g_return_if_fail( harvester != NULL );
170 harvester->folderSize = value;
175 * Specify folder recursion.
176 * Entry: harvester Harvester object.
177 * value TRUE to process sub-folders, FALSE to process folder only.
179 void addrharvest_set_recurse(
180 AddressHarvester* harvester, const gboolean value )
182 g_return_if_fail( harvester != NULL );
183 harvester->folderRecurse = value;
187 * Search (case insensitive) for header entry with specified name.
188 * Enter: harvester Harvester.
190 * Return: Header, or NULL if not found.
192 static HeaderEntry *addrharvest_find(
193 AddressHarvester* harvester, const gchar *name ) {
198 node = harvester->headerTable;
203 if (g_ascii_strncasecmp(entry->header, name,
204 strlen(entry->header)) == 0 ) {
208 node = g_list_next( node );
214 * Set selection for specified heaader.
215 * Enter: harvester Harvester.
217 * value Value to set.
219 void addrharvest_set_header(
220 AddressHarvester* harvester, const gchar *name, const gboolean value )
224 g_return_if_fail( harvester != NULL );
225 entry = addrharvest_find( harvester, name );
226 if( entry != NULL ) {
227 entry->selected = value;
233 * Enter: harvester Harvester.
235 * Return: Address count, or -1 if header not found.
237 gint addrharvest_get_count( AddressHarvester* harvester, const gchar *name ) {
242 g_return_val_if_fail( harvester != NULL, count );
243 entry = addrharvest_find( harvester, name );
244 if( entry != NULL ) {
245 count = entry->count;
251 * Free up object by releasing internal memory.
252 * Enter: harvester Harvester.
254 void addrharvest_free( AddressHarvester *harvester ) {
255 g_return_if_fail( harvester != NULL );
257 /* Free internal stuff */
258 addrharvest_free_table( harvester );
259 g_free( harvester->path );
262 harvester->path = NULL;
263 harvester->retVal = MGU_SUCCESS;
264 harvester->headerTable = NULL;
266 harvester->folderSize = 0;
268 /* Now release object */
273 * Insert address into cache.
274 * Enter: harvester Harvester object.
275 * entry Header object.
276 * cache Address cache to load.
278 * address eMail address.
280 static void addrharvest_insert_cache(
281 AddressHarvester *harvester, HeaderEntry *entry,
282 AddressCache *cache, const gchar *name,
283 const gchar *address )
293 folder = entry->folder;
294 if( folder == NULL ) {
295 newFolder = TRUE; /* No folder yet */
297 if( entry->count % harvester->folderSize == 0 ) {
298 newFolder = TRUE; /* Folder is full */
302 key = g_strdup( address );
304 person = g_hash_table_lookup( harvester->dupTable, key );
306 /* Update existing person to use longest name */
307 value = ADDRITEM_NAME(person);
308 if( strlen( name ) > strlen( value ) ) {
309 addritem_person_set_common_name( person, name );
314 /* Folder if required */
316 cnt = 1 + ( entry->count / harvester->folderSize );
317 folderName =g_strdup_printf( "%s (%d)",
318 entry->header, cnt );
319 folder = addritem_create_item_folder();
320 addritem_folder_set_name( folder, folderName );
321 addritem_folder_set_remarks( folder, "" );
322 addrcache_id_folder( cache, folder );
323 addrcache_add_folder( cache, folder );
324 entry->folder = folder;
325 g_free( folderName );
329 person = addrcache_add_contact(
330 cache, folder, name, address, "" );
331 g_hash_table_insert( harvester->dupTable, key, person );
334 addritem_parse_first_last( person );
338 * Remove specified string from name.
340 * str String to remove.
342 static void addrharvest_del_email( gchar *name, gchar *str ) {
346 lenr = strlen( str );
347 while((p = strcasestr( name, str )) != NULL) {
349 memmove( p, p + lenr, lenn );
354 * Find position of at (@) character in buffer.
355 * Enter: buffer Start of buffer.
356 * Return: Position of at character, or NULL if not found.
357 * Note: This function searches for the last occurrence of an 'at' character
358 * prior to a valid delimiter character for the end of address. This enables
359 * an address to be found where it is also used as the name of the
360 * recipient. For example:
361 * "axle.rose@netscape.com" <axle.rose@netscape.com>
362 * The last occurrence of the at character is detected.
364 static gchar *addrharvest_find_at( const gchar *buffer ) {
368 atCh = strchr( buffer, '@' );
370 /* Search forward for another one */
393 * Find start and end of address string.
394 * Enter: buf Start address of buffer to process (not modified).
395 * atp Pointer to email at (@) character.
396 * bp Pointer to start of email address (returned).
397 * ep Pointer to end of email address (returned).
399 static void addrharvest_find_address(
400 const gchar *buf, const gchar *atp, const gchar **bp,
405 /* Find first non-separator char */
409 if( strchr( ",; \n\r", *p ) == NULL ) break;
414 /* Search forward for end of address */
418 if( strchr( ",;", *p ) ) break;
425 * Extract E-Mail address from buffer. If found, address is removed from
427 * Enter: buffer Address buffer.
428 * Return: E-Mail address, or NULL if none found. Must g_free() when done.
430 static gchar *addrharvest_extract_address( gchar *buffer ) {
432 gchar *atCh, *p, *bp, *ep;
436 atCh = addrharvest_find_at( buffer );
438 /* Search back for start of address */
441 while( p >= buffer ) {
451 /* Search fwd for end */
459 else if( *p == ' ' ) {
470 addr = g_strndup( bp, len + 1 );
471 memmove( bp, ep, len );
480 * Parse address from header buffer creating address in cache.
481 * Enter: harvester Harvester object.
482 * entry Header object.
483 * cache Address cache to load.
484 * hdrBuf Pointer to header buffer.
486 static void addrharvest_parse_address(
487 AddressHarvester *harvester, HeaderEntry *entry,
488 AddressCache *cache, const gchar *hdrBuf )
490 gchar buffer[ ADDR_BUFFSIZE + 2 ];
493 gchar *atCh, *email, *name;
496 /* Search for an address */
497 while((atCh = addrharvest_find_at( hdrBuf )) != NULL) {
498 /* Find addres string */
499 addrharvest_find_address( hdrBuf, atCh, &bp, &ep );
501 /* Copy into buffer */
502 bufLen = ( size_t ) ( ep - bp );
503 if( bufLen > ADDR_BUFFSIZE ) {
504 bufLen = ADDR_BUFFSIZE;
506 strncpy( buffer, bp, bufLen );
507 buffer[ bufLen ] = '\0';
508 buffer[ bufLen + 1 ] = '\0';
509 buffer[ bufLen + 2 ] = '\0';
511 /* Extract address from buffer */
512 email = addrharvest_extract_address( buffer );
514 /* Unescape characters */
515 mgu_str_unescape( buffer );
517 /* Remove noise characaters */
518 addrharvest_del_email( buffer, REM_NAME_STRING );
519 addrharvest_del_email( buffer, REM_NAME_STRING2 );
521 /* Remove leading trailing quotes and spaces */
522 mgu_str_ltc2space( buffer, '\"', '\"' );
523 mgu_str_ltc2space( buffer, '\'', '\'' );
524 mgu_str_ltc2space( buffer, '\"', '\"' );
525 mgu_str_ltc2space( buffer, '(', ')' );
526 g_strstrip( buffer );
528 if( g_ascii_strcasecmp( buffer, email ) == 0 ) {
533 name = conv_unmime_header(buffer, NULL);
536 /* Insert into address book */
537 addrharvest_insert_cache(
538 harvester, entry, cache, name, email );
547 * Test whether buffer contains a header that appears in header list.
548 * Enter: listHdr Header list.
550 * Return: TRUE if header in list.
552 static gboolean addrharvest_check_hdr( GList *listHdr, gchar *buf ) {
555 gchar *p, *hdr, *nhdr;
559 p = strchr( buf, ':' );
561 len = ( size_t ) ( p - buf );
562 hdr = g_strndup( buf, len );
566 if (g_ascii_strncasecmp(nhdr, hdr, strlen(nhdr)) == 0 ) {
570 node = g_list_next( node );
578 * Read header into a linked list of lines.
579 * Enter: fp File to read.
580 * listHdr List of header lines of interest.
581 * done End of headers or end of file reached.
582 * Return: Linked list of lines.
584 static GSList *addrharvest_get_header( FILE *fp, GList *listHdr, gboolean *done ) {
586 gchar buf[ MSG_BUFFSIZE + 2 ];
593 if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
598 /* Test for end of headers */
599 if( buf[0] == '\r' || buf[0] == '\n' ) {
604 /* Test whether required header */
605 foundHdr = addrharvest_check_hdr( listHdr, buf );
607 /* Read all header lines. Only add reqd ones to list */
613 list = g_slist_append( list, p );
616 /* Read first character */
618 if( ch == ' ' || ch == '\t' ) {
619 /* Continuation character - read into buffer */
620 if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
629 /* Push back character for next header */
640 * Read specified file into address book.
641 * Enter: harvester Harvester object.
642 * fileName File to read.
643 * cache Address cache to load.
646 static gint addrharvest_readfile(
647 AddressHarvester *harvester, const gchar *fileName,
648 AddressCache *cache, GList *listHdr )
652 gchar *buf, *addr, *p;
657 msgFile = g_fopen( fileName, "rb" );
659 /* Cannot open file */
660 retVal = MGU_OPEN_FILE;
666 list = addrharvest_get_header( msgFile, listHdr, &done );
673 buf = mgu_list_coalesce( list );
674 mgu_free_list( list );
676 if(( p = strchr( buf, ':' ) ) != NULL ) {
680 entry = addrharvest_find( harvester, buf );
681 if( entry && entry->selected ) {
682 /* Sanitize control characters */
685 if( *p == '\r' || *p == '\n' || *p == '\t' )
689 addrharvest_parse_address(
690 harvester, entry, cache, addr );
701 * Read all files in specified directory into address book. Directories are
702 * traversed recursively if necessary.
703 * Enter: harvester Harvester object.
704 * cache Address cache to load.
705 * msgList List of message numbers, or NULL to process folder.
706 * dir Directory to process.
708 static void addrharvest_harvest_dir(
709 AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
717 if( ( dp = opendir( dir ) ) == NULL ) {
721 /* Process directory */
723 while( ( d = readdir( dp ) ) != NULL ) {
724 stat( d->d_name, &s );
725 if( S_ISDIR( s.st_mode ) ) {
726 if( harvester->folderRecurse ) {
727 if( strstr( DIR_IGNORE, d->d_name ) != NULL )
729 addrharvest_harvest_dir(
730 harvester, cache, listHdr, d->d_name );
733 if( S_ISREG( s.st_mode ) ) {
734 if( ( num = to_number( d->d_name ) ) >= 0 ) {
735 addrharvest_readfile(
736 harvester, d->d_name, cache, listHdr );
745 * Read list of files in specified directory into address book.
746 * Enter: harvester Harvester object.
747 * cache Address cache to load.
748 * msgList List of message numbers, or NULL to process folder.
750 static void addrharvest_harvest_list(
751 AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
757 gchar msgNum[ MSGNUM_BUFFSIZE ];
759 if( ( dp = opendir( harvester->path ) ) == NULL ) {
763 /* Process message list */
764 chdir( harvester->path );
767 num = GPOINTER_TO_UINT( node->data );
768 sprintf( msgNum, "%d", num );
769 addrharvest_readfile( harvester, msgNum, cache, listHdr );
770 node = g_list_next( node );
776 * ============================================================================
777 * Read all files in specified directory into address book.
778 * Enter: harvester Harvester object.
779 * cache Address cache to load.
780 * msgList List of message numbers, or NULL to process folder.
782 * ============================================================================
784 gint addrharvest_harvest(
785 AddressHarvester *harvester, AddressCache *cache, GList *msgList )
791 retVal = MGU_BAD_ARGS;
792 g_return_val_if_fail( harvester != NULL, retVal );
793 g_return_val_if_fail( cache != NULL, retVal );
794 g_return_val_if_fail( harvester->path != NULL, retVal );
797 addrcache_clear( cache );
798 cache->dataRead = FALSE;
800 /* Build list of headers of interest */
802 node = harvester->headerTable;
807 if( entry->selected ) {
810 p = g_strdup( entry->header );
812 listHdr = g_list_append( listHdr, p );
814 node = g_list_next( node );
817 /* Process directory/files */
818 if( msgList == NULL ) {
819 addrharvest_harvest_dir( harvester, cache, listHdr, harvester->path );
822 addrharvest_harvest_list( harvester, cache, listHdr, msgList );
824 mgu_free_dlist( listHdr );
827 cache->modified = FALSE;
828 cache->dataRead = TRUE;
834 * ============================================================================
835 * Test whether any headers have been selected for processing.
836 * Enter: harvester Harvester object.
837 * Return: TRUE if a header was selected, FALSE if none were selected.
838 * ============================================================================
840 gboolean addrharvest_check_header( AddressHarvester *harvester ) {
845 g_return_val_if_fail( harvester != NULL, retVal );
847 node = harvester->headerTable;
851 entry = ( HeaderEntry * ) node->data;
852 if( entry->selected ) return TRUE;
853 node = g_list_next( node );
859 * ============================================================================
861 * ============================================================================