2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 2002 Match Grun
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 * Functions for an E-Mail address harvester.
31 #include "addrharvest.h"
34 /* Mail header names of interest */
35 static gchar *_headerFrom_ = HEADER_FROM;
36 static gchar *_headerReplyTo_ = HEADER_REPLY_TO;
37 static gchar *_headerSender_ = HEADER_SENDER;
38 static gchar *_headerErrorsTo_ = HEADER_ERRORS_TO;
39 static gchar *_headerCC_ = HEADER_CC;
40 static gchar *_headerTo_ = HEADER_TO;
42 #define ADDR_BUFFSIZE 1024
43 #define MSG_BUFFSIZE 2048
44 #define MSGNUM_BUFFSIZE 32
45 #define DFL_FOLDER_SIZE 20
47 /* Noise strings included by some other E-Mail clients */
48 #define REM_NAME_STRING "(E-mail)"
49 #define REM_NAME_STRING2 "(E-mail 2)"
51 /* Directories to ignore */
52 #define DIR_IGNORE ".\t..\t.sylpheed_mark\t.sylpheed_cache"
57 typedef struct _HeaderEntry HeaderEntry;
66 * Build header table entry.
67 * Enter: harvester Harvester object.
70 static void *addrharvest_build_entry(
71 AddressHarvester* harvester, gchar *name )
75 entry = g_new0( HeaderEntry, 1 );
77 entry->selected = FALSE;
80 harvester->headerTable = g_list_append( harvester->headerTable, entry );
83 static void addrharvest_print_hdrentry( HeaderEntry *entry, FILE *stream ) {
84 fprintf( stream, "Header Entry\n" );
85 fprintf( stream, " name : %s\n", entry->header );
86 fprintf( stream, "selected : %s\n", entry->selected ? "yes" : "no" );
92 static gint addrharvest_free_table_vis( gpointer key, gpointer value, gpointer data ) {
102 static void addrharvest_free_table( AddressHarvester* harvester ) {
106 /* Free header list */
107 node = harvester->headerTable;
109 entry = ( HeaderEntry * ) node->data;
110 entry->header = NULL;
111 entry->selected = FALSE;
112 entry->folder = NULL;
115 node = g_list_next( node );
117 g_list_free( harvester->headerTable );
118 harvester->headerTable = NULL;
120 /* Free duplicate table */
121 g_hash_table_freeze( harvester->dupTable );
122 g_hash_table_foreach_remove( harvester->dupTable, addrharvest_free_table_vis, NULL );
123 g_hash_table_thaw( harvester->dupTable );
124 g_hash_table_destroy( harvester->dupTable );
125 harvester->dupTable = NULL;
132 AddressHarvester *addrharvest_create( void ) {
133 AddressHarvester *harvester;
135 harvester = g_new0( AddressHarvester, 1 );
136 harvester->path = NULL;
137 harvester->dupTable = g_hash_table_new( g_str_hash, g_str_equal );
138 harvester->folderSize = DFL_FOLDER_SIZE;
139 harvester->retVal = MGU_SUCCESS;
141 /* Build header table */
142 harvester->headerTable = NULL;
143 addrharvest_build_entry( harvester, _headerFrom_ );
144 addrharvest_build_entry( harvester, _headerReplyTo_ );
145 addrharvest_build_entry( harvester, _headerSender_ );
146 addrharvest_build_entry( harvester, _headerErrorsTo_ );
147 addrharvest_build_entry( harvester, _headerCC_ );
148 addrharvest_build_entry( harvester, _headerTo_ );
157 * Specify path to folder that will be harvested.
158 * Entry: harvester Harvester object.
159 * value Full directory path.
161 void addrharvest_set_path( AddressHarvester* harvester, const gchar *value ) {
162 g_return_if_fail( harvester != NULL );
163 harvester->path = mgu_replace_string( harvester->path, value );
164 g_strstrip( harvester->path );
168 * Specify maximum folder size.
169 * Entry: harvester Harvester object.
172 void addrharvest_set_folder_size(
173 AddressHarvester* harvester, const gint value )
175 g_return_if_fail( harvester != NULL );
177 harvester->folderSize = value;
182 * Specify folder recursion.
183 * Entry: harvester Harvester object.
184 * value TRUE to process sub-folders, FALSE to process folder only.
186 void addrharvest_set_recurse(
187 AddressHarvester* harvester, const gboolean value )
189 g_return_if_fail( harvester != NULL );
190 harvester->folderRecurse = value;
194 * Search (case insensitive) for header entry with specified name.
195 * Enter: harvester Harvester.
197 * Return: Header, or NULL if not found.
199 static HeaderEntry *addrharvest_find(
200 AddressHarvester* harvester, const gchar *name ) {
205 node = harvester->headerTable;
210 if( g_strcasecmp( entry->header, name ) == 0 ) {
214 node = g_list_next( node );
220 * Set selection for specified heaader.
221 * Enter: harvester Harvester.
223 * value Value to set.
225 void addrharvest_set_header(
226 AddressHarvester* harvester, const gchar *name, const gboolean value )
230 g_return_if_fail( harvester != NULL );
231 entry = addrharvest_find( harvester, name );
232 if( entry != NULL ) {
233 entry->selected = value;
239 * Enter: harvester Harvester.
241 * Return: Address count, or -1 if header not found.
243 gint addrharvest_get_count( AddressHarvester* harvester, const gchar *name ) {
248 g_return_val_if_fail( harvester != NULL, count );
249 entry = addrharvest_find( harvester, name );
250 if( entry != NULL ) {
251 count = entry->count;
257 * Free up object by releasing internal memory.
258 * Enter: harvester Harvester.
260 void addrharvest_free( AddressHarvester *harvester ) {
261 g_return_if_fail( harvester != NULL );
263 /* Free internal stuff */
264 addrharvest_free_table( harvester );
265 g_free( harvester->path );
268 harvester->path = NULL;
269 harvester->retVal = MGU_SUCCESS;
270 harvester->headerTable = NULL;
272 harvester->folderSize = 0;
274 /* Now release object */
279 * Display object to specified stream.
280 * Enter: harvester Harvester.
281 * stream Output stream.
283 void addrharvest_print( AddressHarvester *harvester, FILE *stream ) {
287 g_return_if_fail( harvester != NULL );
288 fprintf( stream, "Address Harvester:\n" );
289 fprintf( stream, " file path: '%s'\n", harvester->path );
290 fprintf( stream, "max folder: %d'\n", harvester->folderSize );
292 node = harvester->headerTable;
295 fprintf( stream, " header: %s", entry->header );
296 fprintf( stream, "\t: %s", entry->selected ? "yes" : "no" );
297 fprintf( stream, "\t: %d\n", entry->count );
298 node = g_list_next( node );
300 fprintf( stream, " ret val: %d\n", harvester->retVal );
304 * Insert address into cache.
305 * Enter: harvester Harvester object.
306 * entry Header object.
307 * cache Address cache to load.
309 * address eMail address.
311 static void addrharvest_insert_cache(
312 AddressHarvester *harvester, HeaderEntry *entry,
313 AddressCache *cache, const gchar *name,
314 const gchar *address )
324 folder = entry->folder;
325 if( folder == NULL ) {
326 newFolder = TRUE; /* No folder yet */
328 if( entry->count % harvester->folderSize == 0 ) {
329 newFolder = TRUE; /* Folder is full */
333 key = g_strdup( address );
335 person = g_hash_table_lookup( harvester->dupTable, key );
337 /* Update existing person to use longest name */
338 value = ADDRITEM_NAME(person);
339 if( strlen( name ) > strlen( value ) ) {
340 addritem_person_set_common_name( person, name );
345 /* Folder if required */
347 cnt = 1 + ( entry->count / harvester->folderSize );
348 folderName =g_strdup_printf( "%s (%d)",
349 entry->header, cnt );
350 folder = addritem_create_item_folder();
351 addritem_folder_set_name( folder, folderName );
352 addritem_folder_set_remarks( folder, "" );
353 addrcache_id_folder( cache, folder );
354 addrcache_add_folder( cache, folder );
355 entry->folder = folder;
356 g_free( folderName );
360 person = addrcache_add_contact(
361 cache, folder, name, address, "" );
362 g_hash_table_insert( harvester->dupTable, key, person );
365 addritem_parse_first_last( person );
369 * Remove specified string from name.
371 * str String to remove.
373 static void addrharvest_del_email( gchar *name, gchar *str ) {
377 lenr = strlen( str );
378 while( p = strcasestr( name, str ) ) {
380 memmove( p, p + lenr, lenn );
385 * Find position of at (@) character in buffer.
386 * Enter: buffer Start of buffer.
387 * Return: Position of at character, or NULL if not found.
388 * Note: This function searches for the last occurrence of an 'at' character
389 * prior to a valid delimiter character for the end of address. This enables
390 * an address to be found where it is also used as the name of the
391 * recipient. For example:
392 * "axle.rose@netscape.com" <axle.rose@netscape.com>
393 * The last occurrence of the at character is detected.
395 static gchar *addrharvest_find_at( const gchar *buffer ) {
399 atCh = strchr( buffer, '@' );
401 /* Search forward for another one */
424 * Find start and end of address string.
425 * Enter: buf Start address of buffer to process (not modified).
426 * atp Pointer to email at (@) character.
427 * bp Pointer to start of email address (returned).
428 * ep Pointer to end of email address (returned).
430 static void addrharvest_find_address(
431 const gchar *buf, const gchar *atp, const gchar **bp,
436 /* Find first non-separator char */
440 if( strchr( ",; \n\r", *p ) == NULL ) break;
445 /* Search forward for end of address */
449 if( strchr( ",;", *p ) ) break;
456 * Extract E-Mail address from buffer. If found, address is removed from
458 * Enter: buffer Address buffer.
459 * Return: E-Mail address, or NULL if none found. Must g_free() when done.
461 static gchar *addrharvest_extract_address( gchar *buffer ) {
463 gchar *atCh, *p, *bp, *ep;
467 atCh = addrharvest_find_at( buffer );
469 /* Search back for start of address */
472 while( p >= buffer ) {
482 /* Search fwd for end */
490 else if( *p == ' ' ) {
501 addr = g_strndup( bp, len + 1 );
502 memmove( bp, ep, len );
511 * Parse address from header buffer creating address in cache.
512 * Enter: harvester Harvester object.
513 * entry Header object.
514 * cache Address cache to load.
515 * hdrBuf Pointer to header buffer.
517 static void addrharvest_parse_address(
518 AddressHarvester *harvester, HeaderEntry *entry,
519 AddressCache *cache, const gchar *hdrBuf )
521 gchar buffer[ ADDR_BUFFSIZE + 2 ];
524 gchar *atCh, *email, *name;
527 /* Search for an address */
528 while( atCh = addrharvest_find_at( hdrBuf ) ) {
529 /* Find addres string */
530 addrharvest_find_address( hdrBuf, atCh, &bp, &ep );
532 /* Copy into buffer */
533 bufLen = ( size_t ) ( ep - bp );
534 if( bufLen > ADDR_BUFFSIZE ) {
535 bufLen = ADDR_BUFFSIZE;
537 strncpy( buffer, bp, bufLen );
538 buffer[ bufLen ] = '\0';
539 buffer[ bufLen + 1 ] = '\0';
540 buffer[ bufLen + 2 ] = '\0';
542 /* Extract address from buffer */
543 email = addrharvest_extract_address( buffer );
545 /* Unescape characters */
546 mgu_str_unescape( buffer );
548 /* Remove noise characaters */
549 addrharvest_del_email( buffer, REM_NAME_STRING );
550 addrharvest_del_email( buffer, REM_NAME_STRING2 );
552 /* Remove leading trailing quotes and spaces */
553 mgu_str_ltc2space( buffer, '\"', '\"' );
554 mgu_str_ltc2space( buffer, '\'', '\'' );
555 mgu_str_ltc2space( buffer, '\"', '\"' );
556 mgu_str_ltc2space( buffer, '(', ')' );
557 g_strstrip( buffer );
559 if( g_strcasecmp( buffer, email ) == 0 ) {
566 /* Insert into address book */
567 addrharvest_insert_cache(
568 harvester, entry, cache, name, email );
576 * Test whether buffer contains a header that appears in header list.
577 * Enter: listHdr Header list.
579 * Return: TRUE if header in list.
581 static gboolean addrharvest_check_hdr( GList *listHdr, gchar *buf ) {
584 gchar *p, *hdr, *nhdr;
588 p = strchr( buf, ':' );
590 len = ( size_t ) ( p - buf );
591 hdr = g_strndup( buf, len );
595 if( g_strcasecmp( nhdr, hdr ) == 0 ) {
599 node = g_list_next( node );
607 * Read header into a linked list of lines.
608 * Enter: fp File to read.
609 * listHdr List of header lines of interest.
610 * done End of headers or end of file reached.
611 * Return: Linked list of lines.
613 static GSList *addrharvest_get_header( FILE *fp, GList *listHdr, gboolean *done ) {
615 gchar buf[ MSG_BUFFSIZE + 2 ];
622 if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
627 /* Test for end of headers */
628 if( buf[0] == '\r' || buf[0] == '\n' ) {
633 /* Test whether required header */
634 foundHdr = addrharvest_check_hdr( listHdr, buf );
636 /* Read all header lines. Only add reqd ones to list */
642 list = g_slist_append( list, p );
645 /* Read first character */
647 if( ch == ' ' || ch == '\t' ) {
648 /* Continuation character - read into buffer */
649 if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
658 /* Push back character for next header */
669 * Read specified file into address book.
670 * Enter: harvester Harvester object.
671 * fileName File to read.
672 * cache Address cache to load.
675 static gint addrharvest_readfile(
676 AddressHarvester *harvester, const gchar *fileName,
677 AddressCache *cache, GList *listHdr )
681 gchar *buf, *addr, *p;
686 msgFile = fopen( fileName, "rb" );
688 /* Cannot open file */
689 retVal = MGU_OPEN_FILE;
695 list = addrharvest_get_header( msgFile, listHdr, &done );
702 buf = mgu_list_coalesce( list );
703 mgu_free_list( list );
705 if(( p = strchr( buf, ':' ) ) != NULL ) {
709 entry = addrharvest_find( harvester, buf );
710 if( entry && entry->selected ) {
711 /* Sanitize control characters */
714 if( *p == '\r' || *p == '\n' || *p == '\t' )
718 addrharvest_parse_address(
719 harvester, entry, cache, addr );
730 * Read all files in specified directory into address book. Directories are
731 * traversed recursively if necessary.
732 * Enter: harvester Harvester object.
733 * cache Address cache to load.
734 * msgList List of message numbers, or NULL to process folder.
735 * dir Directory to process.
737 static void addrharvest_harvest_dir(
738 AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
746 if( ( dp = opendir( dir ) ) == NULL ) {
750 /* Process directory */
752 while( ( d = readdir( dp ) ) != NULL ) {
753 stat( d->d_name, &s );
754 if( S_ISDIR( s.st_mode ) ) {
755 if( harvester->folderRecurse ) {
756 if( strstr( DIR_IGNORE, d->d_name ) != NULL )
758 addrharvest_harvest_dir(
759 harvester, cache, listHdr, d->d_name );
762 if( S_ISREG( s.st_mode ) ) {
763 if( ( num = to_number( d->d_name ) ) >= 0 ) {
764 addrharvest_readfile(
765 harvester, d->d_name, cache, listHdr );
774 * Read list of files in specified directory into address book.
775 * Enter: harvester Harvester object.
776 * cache Address cache to load.
777 * msgList List of message numbers, or NULL to process folder.
779 static void addrharvest_harvest_list(
780 AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
786 gchar msgNum[ MSGNUM_BUFFSIZE ];
788 if( ( dp = opendir( harvester->path ) ) == NULL ) {
792 /* Process message list */
793 chdir( harvester->path );
796 num = GPOINTER_TO_UINT( node->data );
797 sprintf( msgNum, "%d", num );
798 addrharvest_readfile( harvester, msgNum, cache, listHdr );
799 node = g_list_next( node );
805 * ============================================================================
806 * Read all files in specified directory into address book.
807 * Enter: harvester Harvester object.
808 * cache Address cache to load.
809 * msgList List of message numbers, or NULL to process folder.
811 * ============================================================================
813 gint addrharvest_harvest(
814 AddressHarvester *harvester, AddressCache *cache, GList *msgList )
820 retVal = MGU_BAD_ARGS;
821 g_return_val_if_fail( harvester != NULL, retVal );
822 g_return_val_if_fail( cache != NULL, retVal );
823 g_return_val_if_fail( harvester->path != NULL, retVal );
826 addrcache_clear( cache );
827 cache->dataRead = FALSE;
829 /* Build list of headers of interest */
831 node = harvester->headerTable;
836 if( entry->selected ) {
839 p = g_strdup( entry->header );
841 listHdr = g_list_append( listHdr, p );
843 node = g_list_next( node );
846 /* Process directory/files */
847 if( msgList == NULL ) {
848 addrharvest_harvest_dir( harvester, cache, listHdr, harvester->path );
851 addrharvest_harvest_list( harvester, cache, listHdr, msgList );
853 mgu_free_dlist( listHdr );
856 cache->modified = FALSE;
857 cache->dataRead = TRUE;
863 * ============================================================================
864 * Test whether any headers have been selected for processing.
865 * Enter: harvester Harvester object.
866 * Return: TRUE if a header was selected, FALSE if none were selected.
867 * ============================================================================
869 gboolean addrharvest_check_header( AddressHarvester *harvester ) {
874 g_return_val_if_fail( harvester != NULL, retVal );
876 node = harvester->headerTable;
880 entry = ( HeaderEntry * ) node->data;
881 if( entry->selected ) return TRUE;
882 node = g_list_next( node );
888 * ============================================================================
890 * ============================================================================