2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 2002 Match Grun
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 * Functions for an E-Mail address harvester.
22 * Code still needs some work. Address parsing not strictly correct.
32 #include "addrharvest.h"
35 /* Mail header names of interest */
36 static gchar *_headerFrom_ = HEADER_FROM;
37 static gchar *_headerReplyTo_ = HEADER_REPLY_TO;
38 static gchar *_headerSender_ = HEADER_SENDER;
39 static gchar *_headerErrorsTo_ = HEADER_ERRORS_TO;
40 static gchar *_headerCC_ = HEADER_CC;
41 static gchar *_headerTo_ = HEADER_TO;
43 #define ADDR_BUFFSIZE 1024
44 #define MSG_BUFFSIZE 2048
45 #define MSGNUM_BUFFSIZE 32
46 #define DFL_FOLDER_SIZE 20
48 /* Noise strings included by some other E-Mail clients */
49 #define REM_NAME_STRING "(E-mail)"
50 #define REM_NAME_STRING2 "(E-mail 2)"
55 typedef struct _HeaderEntry HeaderEntry;
64 * Build header table entry.
65 * Enter: harvester Harvester object.
68 static void *addrharvest_build_entry(
69 AddressHarvester* harvester, gchar *name )
73 entry = g_new0( HeaderEntry, 1 );
75 entry->selected = FALSE;
78 harvester->headerTable = g_list_append( harvester->headerTable, entry );
81 static void addrharvest_print_hdrentry( HeaderEntry *entry, FILE *stream ) {
82 fprintf( stream, "Header Entry\n" );
83 fprintf( stream, " name : %s\n", entry->header );
84 fprintf( stream, "selected : %s\n", entry->selected ? "yes" : "no" );
90 static gint addrharvest_free_table_vis( gpointer key, gpointer value, gpointer data ) {
100 static void addrharvest_free_table( AddressHarvester* harvester ) {
104 /* Free header list */
105 node = harvester->headerTable;
107 entry = ( HeaderEntry * ) node->data;
108 entry->header = NULL;
109 entry->selected = FALSE;
110 entry->folder = NULL;
113 node = g_list_next( node );
115 g_list_free( harvester->headerTable );
116 harvester->headerTable = NULL;
118 /* Free duplicate table */
119 g_hash_table_freeze( harvester->dupTable );
120 g_hash_table_foreach_remove( harvester->dupTable, addrharvest_free_table_vis, NULL );
121 g_hash_table_thaw( harvester->dupTable );
122 g_hash_table_destroy( harvester->dupTable );
123 harvester->dupTable = NULL;
130 AddressHarvester *addrharvest_create( void ) {
131 AddressHarvester *harvester;
133 harvester = g_new0( AddressHarvester, 1 );
134 harvester->path = NULL;
135 harvester->dupTable = g_hash_table_new( g_str_hash, g_str_equal );
136 harvester->folderSize = DFL_FOLDER_SIZE;
137 harvester->retVal = MGU_SUCCESS;
139 /* Build header table */
140 harvester->headerTable = NULL;
141 addrharvest_build_entry( harvester, _headerFrom_ );
142 addrharvest_build_entry( harvester, _headerReplyTo_ );
143 addrharvest_build_entry( harvester, _headerSender_ );
144 addrharvest_build_entry( harvester, _headerErrorsTo_ );
145 addrharvest_build_entry( harvester, _headerCC_ );
146 addrharvest_build_entry( harvester, _headerTo_ );
155 * Specify path to folder that will be harvested.
156 * Entry: harvester Harvester object.
157 * value Full directory path.
159 void addrharvest_set_path( AddressHarvester* harvester, const gchar *value ) {
160 g_return_if_fail( harvester != NULL );
161 harvester->path = mgu_replace_string( harvester->path, value );
162 g_strstrip( harvester->path );
166 * Specify maximum folder size.
167 * Entry: harvester Harvester object.
170 void addrharvest_set_folder_size(
171 AddressHarvester* harvester, const gint value )
173 g_return_if_fail( harvester != NULL );
175 harvester->folderSize = value;
180 * Search (case insensitive) for header entry with specified name.
181 * Enter: harvester Harvester.
183 * Return: Header, or NULL if not found.
185 static HeaderEntry *addrharvest_find(
186 AddressHarvester* harvester, const gchar *name ) {
191 node = harvester->headerTable;
196 if( g_strcasecmp( entry->header, name ) == 0 ) {
200 node = g_list_next( node );
206 * Set selection for specified heaader.
207 * Enter: harvester Harvester.
209 * value Value to set.
211 void addrharvest_set_header(
212 AddressHarvester* harvester, const gchar *name, const gboolean value )
216 g_return_if_fail( harvester != NULL );
217 entry = addrharvest_find( harvester, name );
218 if( entry != NULL ) {
219 entry->selected = value;
225 * Enter: harvester Harvester.
227 * Return: Address count, or -1 if header not found.
229 gint addrharvest_get_count( AddressHarvester* harvester, const gchar *name ) {
234 g_return_val_if_fail( harvester != NULL, count );
235 entry = addrharvest_find( harvester, name );
236 if( entry != NULL ) {
237 count = entry->count;
243 * Free up object by releasing internal memory.
244 * Enter: harvester Harvester.
246 void addrharvest_free( AddressHarvester *harvester ) {
247 g_return_if_fail( harvester != NULL );
249 /* Free internal stuff */
250 addrharvest_free_table( harvester );
251 g_free( harvester->path );
254 harvester->path = NULL;
255 harvester->retVal = MGU_SUCCESS;
256 harvester->headerTable = NULL;
258 harvester->folderSize = 0;
260 /* Now release object */
265 * Display object to specified stream.
266 * Enter: harvester Harvester.
267 * stream Output stream.
269 void addrharvest_print( AddressHarvester *harvester, FILE *stream ) {
273 g_return_if_fail( harvester != NULL );
274 fprintf( stream, "Address Harvester:\n" );
275 fprintf( stream, " file path: '%s'\n", harvester->path );
276 fprintf( stream, "max folder: %d'\n", harvester->folderSize );
278 node = harvester->headerTable;
281 fprintf( stream, " header: %s", entry->header );
282 fprintf( stream, "\t: %s", entry->selected ? "yes" : "no" );
283 fprintf( stream, "\t: %d\n", entry->count );
284 node = g_list_next( node );
286 fprintf( stream, " ret val: %d\n", harvester->retVal );
290 * Insert address into cache.
291 * Enter: harvester Harvester object.
292 * entry Header object.
293 * cache Address cache to load.
295 * address eMail address.
297 static void addrharvest_insert_cache(
298 AddressHarvester *harvester, HeaderEntry *entry,
299 AddressCache *cache, const gchar *name,
300 const gchar *address )
310 folder = entry->folder;
311 if( folder == NULL ) {
312 newFolder = TRUE; /* No folder yet */
314 if( entry->count % harvester->folderSize == 0 ) {
315 newFolder = TRUE; /* Folder is full */
319 key = g_strdup( address );
321 person = g_hash_table_lookup( harvester->dupTable, key );
323 /* Update existing person to use longest name */
324 value = ADDRITEM_NAME(person);
325 if( strlen( name ) > strlen( value ) ) {
326 addritem_person_set_common_name( person, name );
331 /* Folder if required */
333 cnt = 1 + ( entry->count / harvester->folderSize );
334 folderName =g_strdup_printf( "%s (%d)",
335 entry->header, cnt );
336 folder = addritem_create_item_folder();
337 addritem_folder_set_name( folder, folderName );
338 addritem_folder_set_remarks( folder, "" );
339 addrcache_id_folder( cache, folder );
340 addrcache_add_folder( cache, folder );
341 entry->folder = folder;
342 g_free( folderName );
346 person = addrcache_add_contact(
347 cache, folder, name, address, "" );
348 g_hash_table_insert( harvester->dupTable, key, person );
351 addritem_parse_first_last( person );
355 * Remove specified string from name.
357 * str String to remove.
359 static void addrharvest_del_email( gchar *name, gchar *str ) {
363 lenr = strlen( str );
364 while( p = strcasestr( name, str ) ) {
366 memmove( p, p + lenr, lenn );
371 * Find position of at (@) character in buffer.
372 * Enter: buffer Start of buffer.
373 * Return: Position of at character, or NULL if not found.
374 * Note: This function searches for the last occurrence of an 'at' character
375 * prior to a valid delimiter character for the end of address. This enables
376 * an address to be found where it is also used as the name of the
377 * recipient. For example:
378 * "axle.rose@netscape.com" <axle.rose@netscape.com>
379 * The last occurrence of the at character is detected.
381 static gchar *addrharvest_find_at( const gchar *buffer ) {
385 atCh = strchr( buffer, '@' );
387 /* Search forward for another one */
410 * Find start and end of address string.
411 * Enter: buf Start address of buffer to process (not modified).
412 * atp Pointer to email at (@) character.
413 * bp Pointer to start of email address (returned).
414 * ep Pointer to end of email address (returned).
416 static void addrharvest_find_address(
417 const gchar *buf, const gchar *atp, const gchar **bp,
422 /* Find first non-separator char */
426 if( strchr( ",; \n\r", *p ) == NULL ) break;
431 /* Search forward for end of address */
435 if( strchr( ",;", *p ) ) break;
442 * Extract E-Mail address from buffer. If found, address is removed from
444 * Enter: buffer Address buffer.
445 * Return: E-Mail address, or NULL if none found. Must g_free() when done.
447 static gchar *addrharvest_extract_address( gchar *buffer ) {
449 gchar *atCh, *p, *bp, *ep;
453 atCh = addrharvest_find_at( buffer );
455 /* Search back for start of address */
458 while( p >= buffer ) {
468 /* Search fwd for end */
476 else if( *p == ' ' ) {
487 addr = g_strndup( bp, len + 1 );
488 memmove( bp, ep, len );
497 * Parse address from header buffer creating address in cache.
498 * Enter: harvester Harvester object.
499 * entry Header object.
500 * cache Address cache to load.
501 * hdrBuf Pointer to header buffer.
503 static void addrharvest_parse_address(
504 AddressHarvester *harvester, HeaderEntry *entry,
505 AddressCache *cache, const gchar *hdrBuf )
507 gchar buffer[ ADDR_BUFFSIZE + 2 ];
510 gchar *atCh, *email, *name;
513 /* Search for an address */
514 while( atCh = addrharvest_find_at( hdrBuf ) ) {
515 /* Find addres string */
516 addrharvest_find_address( hdrBuf, atCh, &bp, &ep );
518 /* Copy into buffer */
519 bufLen = ( size_t ) ( ep - bp );
520 if( bufLen > ADDR_BUFFSIZE ) {
521 bufLen = ADDR_BUFFSIZE;
523 strncpy( buffer, bp, bufLen );
524 buffer[ bufLen ] = '\0';
525 buffer[ bufLen + 1 ] = '\0';
526 buffer[ bufLen + 2 ] = '\0';
528 /* Extract address from buffer */
529 email = addrharvest_extract_address( buffer );
531 /* Unescape characters */
532 mgu_str_unescape( buffer );
534 /* Remove noise characaters */
535 addrharvest_del_email( buffer, REM_NAME_STRING );
536 addrharvest_del_email( buffer, REM_NAME_STRING2 );
538 /* Remove leading trailing quotes and spaces */
539 mgu_str_ltc2space( buffer, '\"', '\"' );
540 mgu_str_ltc2space( buffer, '\'', '\'' );
541 mgu_str_ltc2space( buffer, '\"', '\"' );
542 mgu_str_ltc2space( buffer, '(', ')' );
543 g_strstrip( buffer );
545 if( g_strcasecmp( buffer, email ) == 0 ) {
552 /* Insert into address book */
553 addrharvest_insert_cache(
554 harvester, entry, cache, name, email );
562 * Test whether buffer contains a header that appears in header list.
563 * Enter: listHdr Header list.
565 * Return: TRUE if header in list.
567 static gboolean addrharvest_check_hdr( GList *listHdr, gchar *buf ) {
570 gchar *p, *hdr, *nhdr;
574 p = strchr( buf, ':' );
576 len = ( size_t ) ( p - buf );
577 hdr = g_strndup( buf, len );
581 if( g_strcasecmp( nhdr, hdr ) == 0 ) {
585 node = g_list_next( node );
593 * Read header into a linked list of lines.
594 * Enter: fp File to read.
595 * listHdr List of header lines of interest.
596 * done End of headers or end of file reached.
597 * Return: Linked list of lines.
599 static GSList *addrharvest_get_header( FILE *fp, GList *listHdr, gboolean *done ) {
601 gchar buf[ MSG_BUFFSIZE + 2 ];
608 if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
613 /* Test for end of headers */
614 if( buf[0] == '\r' || buf[0] == '\n' ) {
619 /* Test whether required header */
620 foundHdr = addrharvest_check_hdr( listHdr, buf );
622 /* Read all header lines. Only add reqd ones to list */
628 list = g_slist_append( list, p );
631 /* Read first character */
633 if( ch == ' ' || ch == '\t' ) {
634 /* Continuation character - read into buffer */
635 if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
644 /* Push back character for next header */
655 * Read specified file into address book.
656 * Enter: harvester Harvester object.
657 * fileName File to read.
658 * cache Address cache to load.
661 static gint addrharvest_readfile(
662 AddressHarvester *harvester, const gchar *fileName,
663 AddressCache *cache, GList *listHdr )
667 gchar *buf, *addr, *p;
672 msgFile = fopen( fileName, "rb" );
674 /* Cannot open file */
675 retVal = MGU_OPEN_FILE;
681 list = addrharvest_get_header( msgFile, listHdr, &done );
688 buf = mgu_list_coalesce( list );
689 mgu_free_list( list );
691 if(( p = strchr( buf, ':' ) ) != NULL ) {
695 entry = addrharvest_find( harvester, buf );
696 if( entry && entry->selected ) {
697 /* Sanitize control characters */
700 if( *p == '\r' || *p == '\n' || *p == '\t' )
704 addrharvest_parse_address(
705 harvester, entry, cache, addr );
716 * ============================================================================
717 * Read all files in specified directory into address book.
718 * Enter: harvester Harvester object.
719 * cache Address cache to load.
720 * msgList List of message numbers, or NULL to process folder.
722 * ============================================================================
724 gint addrharvest_harvest(
725 AddressHarvester *harvester, AddressCache *cache, GList *msgList )
734 gchar msgNum[ MSGNUM_BUFFSIZE ];
736 retVal = MGU_BAD_ARGS;
737 g_return_val_if_fail( harvester != NULL, retVal );
738 g_return_val_if_fail( cache != NULL, retVal );
739 g_return_val_if_fail( harvester->path != NULL, retVal );
742 addrcache_clear( cache );
743 cache->dataRead = FALSE;
745 if( chdir( harvester->path ) < 0 ) {
746 /* printf( "Error changing dir\n" ); */
750 if( ( dp = opendir( harvester->path ) ) == NULL ) {
751 /* printf( "Error opening dir\n" ); */
755 /* Build list of headers of interest */
757 node = harvester->headerTable;
762 if( entry->selected ) {
765 p = g_strdup( entry->header );
767 listHdr = g_list_append( listHdr, p );
769 node = g_list_next( node );
772 if( msgList == NULL ) {
773 /* Process directory */
774 while( ( d = readdir( dp ) ) != NULL ) {
775 stat( d->d_name, &s );
776 if( S_ISREG( s.st_mode ) ) {
777 if( ( num = to_number( d->d_name ) ) >= 0 ) {
778 addrharvest_readfile(
779 harvester, d->d_name, cache, listHdr );
785 /* Process message list */
788 num = GPOINTER_TO_UINT( node->data );
789 sprintf( msgNum, "%d", num );
790 addrharvest_readfile(
791 harvester, msgNum, cache, listHdr );
792 node = g_list_next( node );
795 mgu_free_dlist( listHdr );
800 cache->modified = FALSE;
801 cache->dataRead = TRUE;
807 * ============================================================================
808 * Test whether any headers have been selected for processing.
809 * Enter: harvester Harvester object.
810 * Return: TRUE if a header was selected, FALSE if none were selected.
811 * ============================================================================
813 gboolean addrharvest_check_header( AddressHarvester *harvester ) {
818 g_return_val_if_fail( harvester != NULL, retVal );
820 node = harvester->headerTable;
824 entry = ( HeaderEntry * ) node->data;
825 if( entry->selected ) return TRUE;
826 node = g_list_next( node );
832 * ============================================================================
834 * ============================================================================