2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 2002 Match Grun
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 * Functions for an E-Mail address harvester.
22 * Code still needs some work. Address parsing not strictly correct.
32 #include "addrharvest.h"
35 /* Mail header names of interest */
36 static gchar *_headerFrom_ = HEADER_FROM;
37 static gchar *_headerReplyTo_ = HEADER_REPLY_TO;
38 static gchar *_headerSender_ = HEADER_SENDER;
39 static gchar *_headerErrorsTo_ = HEADER_ERRORS_TO;
40 static gchar *_headerCC_ = HEADER_CC;
41 static gchar *_headerTo_ = HEADER_TO;
43 #define ADDR_BUFFSIZE 1024
44 #define MSG_BUFFSIZE 16384
45 #define DFL_FOLDER_SIZE 20
47 /* Noise strings included by some other E-Mail clients */
48 #define REM_NAME_STRING "(E-mail)"
49 #define REM_NAME_STRING2 "(E-mail 2)"
54 typedef struct _HeaderEntry HeaderEntry;
63 * Build header table entry.
64 * Enter: harvester Harvester object.
67 static void *addrharvest_build_entry(
68 AddressHarvester* harvester, gchar *name )
72 entry = g_new0( HeaderEntry, 1 );
74 entry->selected = FALSE;
77 harvester->headerTable = g_list_append( harvester->headerTable, entry );
80 static void addrharvest_print_hdrentry( HeaderEntry *entry, FILE *stream ) {
81 fprintf( stream, "Header Entry\n" );
82 fprintf( stream, " name : %s\n", entry->header );
83 fprintf( stream, "selected : %s\n", entry->selected ? "yes" : "no" );
89 static gint addrharvest_free_table_vis( gpointer key, gpointer value, gpointer data ) {
99 static void addrharvest_free_table( AddressHarvester* harvester ) {
103 /* Free header list */
104 node = harvester->headerTable;
106 entry = ( HeaderEntry * ) node->data;
107 entry->header = NULL;
108 entry->selected = FALSE;
109 entry->folder = NULL;
112 node = g_list_next( node );
114 g_list_free( harvester->headerTable );
115 harvester->headerTable = NULL;
117 /* Free duplicate table */
118 g_hash_table_freeze( harvester->dupTable );
119 g_hash_table_foreach_remove( harvester->dupTable, addrharvest_free_table_vis, NULL );
120 g_hash_table_thaw( harvester->dupTable );
121 g_hash_table_destroy( harvester->dupTable );
122 harvester->dupTable = NULL;
129 AddressHarvester *addrharvest_create( void ) {
130 AddressHarvester *harvester;
132 harvester = g_new0( AddressHarvester, 1 );
133 harvester->path = NULL;
134 harvester->dupTable = g_hash_table_new( g_str_hash, g_str_equal );
135 harvester->folderSize = DFL_FOLDER_SIZE;
136 harvester->retVal = MGU_SUCCESS;
138 /* Build header table */
139 harvester->headerTable = NULL;
140 addrharvest_build_entry( harvester, _headerFrom_ );
141 addrharvest_build_entry( harvester, _headerReplyTo_ );
142 addrharvest_build_entry( harvester, _headerSender_ );
143 addrharvest_build_entry( harvester, _headerErrorsTo_ );
144 addrharvest_build_entry( harvester, _headerCC_ );
145 addrharvest_build_entry( harvester, _headerTo_ );
154 * Specify path to folder that will be harvested.
155 * Entry: harvester Harvester object.
156 * value Full directory path.
158 void addrharvest_set_path( AddressHarvester* harvester, const gchar *value ) {
159 g_return_if_fail( harvester != NULL );
160 harvester->path = mgu_replace_string( harvester->path, value );
161 g_strstrip( harvester->path );
165 * Specify maximum folder size.
166 * Entry: harvester Harvester object.
169 void addrharvest_set_folder_size(
170 AddressHarvester* harvester, const gint value )
172 g_return_if_fail( harvester != NULL );
174 harvester->folderSize = value;
179 * Search (case insensitive) for header entry with specified name.
180 * Enter: harvester Harvester.
182 * Return: Header, or NULL if not found.
184 static HeaderEntry *addrharvest_find(
185 AddressHarvester* harvester, const gchar *name ) {
190 node = harvester->headerTable;
195 if( g_strcasecmp( entry->header, name ) == 0 ) {
199 node = g_list_next( node );
205 * Set selection for specified heaader.
206 * Enter: harvester Harvester.
208 * value Value to set.
210 void addrharvest_set_header(
211 AddressHarvester* harvester, const gchar *name, const gboolean value )
215 g_return_if_fail( harvester != NULL );
216 entry = addrharvest_find( harvester, name );
217 if( entry != NULL ) {
218 entry->selected = value;
224 * Enter: harvester Harvester.
226 * Return: Address count, or -1 if header not found.
228 gint addrharvest_get_count( AddressHarvester* harvester, const gchar *name ) {
233 g_return_val_if_fail( harvester != NULL, count );
234 entry = addrharvest_find( harvester, name );
235 if( entry != NULL ) {
236 count = entry->count;
242 * Free up object by releasing internal memory.
243 * Enter: harvester Harvester.
245 void addrharvest_free( AddressHarvester *harvester ) {
246 g_return_if_fail( harvester != NULL );
248 /* Free internal stuff */
249 addrharvest_free_table( harvester );
250 g_free( harvester->path );
253 harvester->path = NULL;
254 harvester->retVal = MGU_SUCCESS;
255 harvester->headerTable = NULL;
257 harvester->folderSize = 0;
259 /* Now release object */
264 * Display object to specified stream.
265 * Enter: harvester Harvester.
266 * stream Output stream.
268 void addrharvest_print( AddressHarvester *harvester, FILE *stream ) {
272 g_return_if_fail( harvester != NULL );
273 fprintf( stream, "Address Harvester:\n" );
274 fprintf( stream, " file path: '%s'\n", harvester->path );
275 fprintf( stream, "max folder: %d'\n", harvester->folderSize );
277 node = harvester->headerTable;
280 fprintf( stream, " header: %s", entry->header );
281 fprintf( stream, "\t: %s", entry->selected ? "yes" : "no" );
282 fprintf( stream, "\t: %d\n", entry->count );
283 node = g_list_next( node );
285 fprintf( stream, " ret val: %d\n", harvester->retVal );
289 * Insert address into cache.
290 * Enter: harvester Harvester object.
291 * entry Header object.
292 * cache Address cache to load.
294 * address eMail address.
296 static void addrharvest_insert_cache(
297 AddressHarvester *harvester, HeaderEntry *entry,
298 AddressCache *cache, const gchar *name,
299 const gchar *address )
309 folder = entry->folder;
310 if( folder == NULL ) {
311 newFolder = TRUE; /* No folder yet */
313 if( entry->count % harvester->folderSize == 0 ) {
314 newFolder = TRUE; /* Folder is full */
318 cnt = 1 + ( entry->count / harvester->folderSize );
319 folderName = g_strdup_printf( "%s (%d)", entry->header, cnt );
320 folder = addritem_create_item_folder();
321 addritem_folder_set_name( folder, folderName );
322 addritem_folder_set_remarks( folder, "" );
323 addrcache_id_folder( cache, folder );
324 addrcache_add_folder( cache, folder );
325 entry->folder = folder;
326 g_free( folderName );
330 key = g_strdup( address );
332 person = g_hash_table_lookup( harvester->dupTable, key );
334 /* Use longest name */
335 value = ADDRITEM_NAME(person);
336 if( strlen( name ) > strlen( value ) ) {
337 addritem_person_set_common_name( person, name );
343 person = addrcache_add_contact(
344 cache, folder, name, address, "" );
345 g_hash_table_insert( harvester->dupTable, key, person );
351 * Remove specified string from name.
353 * em String to remove.
355 static void addrharvest_del_email( gchar *name, gchar *em ) {
360 while( p = strcasestr( name, em ) ) {
361 memmove( p, p + ilen, ilen + 1 );
366 * Find position of at (@) character in buffer.
367 * Enter: buffer Start of buffer.
368 * Return: Position of at character, or NULL if not found.
369 * Note: This function searches for the last occurrence of an 'at' character
370 * prior to a valid delimiter character for the end of address. This enables
371 * an address to be found where it is also used as the name of the
372 * recipient. For example:
373 * "axle.rose@netscape.com" <axle.rose@netscape.com>
374 * The last occurrence of the at character is detected.
376 static gchar *addrharvest_find_at( const gchar *buffer ) {
380 atCh = strchr( buffer, '@' );
382 /* Search forward for another one */
405 * Find start and end of address string.
406 * Enter: buf Start address of buffer to process (not modified).
407 * atp Pointer to email at (@) character.
408 * bp Pointer to start of email address (returned).
409 * ep Pointer to end of email address (returned).
411 static void addrharvest_find_address(
412 const gchar *buf, const gchar *atp, const gchar **bp,
417 /* Find first non-separator char */
421 if( strchr( ",; \n\r", *p ) == NULL ) break;
426 /* Search forward for end of address */
430 if( strchr( ",;", *p ) ) break;
437 * Extract E-Mail address from buffer. If found, address is removed from
439 * Enter: buffer Address buffer.
440 * Return: E-Mail address, or NULL if none found. Must g_free() when done.
442 static gchar *addrharvest_extract_address( gchar *buffer ) {
444 gchar *atCh, *p, *bp, *ep;
448 atCh = addrharvest_find_at( buffer );
450 /* Search back for start of address */
453 while( p >= buffer ) {
463 /* Search fwd for end */
471 else if( *p == ' ' ) {
482 addr = g_strndup( bp, len + 1 );
483 memmove( bp, ep, len );
492 * Parse address from header buffer creating address in cache.
493 * Enter: harvester Harvester object.
494 * entry Header object.
495 * cache Address cache to load.
496 * hdrBuf Pointer to header buffer.
498 static void addrharvest_parse_address(
499 AddressHarvester *harvester, HeaderEntry *entry,
500 AddressCache *cache, const gchar *hdrBuf )
502 gchar buffer[ ADDR_BUFFSIZE + 2 ];
505 gchar *atCh, *email, *p;
508 /* Search for an address */
509 while( atCh = addrharvest_find_at( hdrBuf ) ) {
510 /* Find addres string */
511 addrharvest_find_address( hdrBuf, atCh, &bp, &ep );
513 /* Copy into buffer */
514 bufLen = ( size_t ) ( ep - bp );
515 if( bufLen > ADDR_BUFFSIZE ) {
516 bufLen = ADDR_BUFFSIZE;
518 strncpy( buffer, bp, bufLen );
519 buffer[ bufLen ] = '\0';
520 buffer[ bufLen + 1 ] = '\0';
521 buffer[ bufLen + 2 ] = '\0';
523 /* Make whitespace */
526 if( *p == '\r' || *p == '\n' || *p == '\t' ) *p = ' ';
530 /* Extract address from buffer */
531 email = addrharvest_extract_address( buffer );
533 /* Unescape characters */
534 mgu_str_unescape( buffer );
536 /* Remove noise characaters */
537 addrharvest_del_email( buffer, REM_NAME_STRING );
538 addrharvest_del_email( buffer, REM_NAME_STRING2 );
540 /* Remove leading trailing quotes and spaces */
541 mgu_str_ltc2space( buffer, '\"', '\"' );
542 mgu_str_ltc2space( buffer, '\'', '\'' );
543 mgu_str_ltc2space( buffer, '\"', '\"' );
544 mgu_str_ltc2space( buffer, '(', ')' );
545 g_strstrip( buffer );
547 /* Insert into address book */
548 addrharvest_insert_cache(
549 harvester, entry, cache, buffer, email );
557 * Read specified file into address book.
558 * Enter: harvester Harvester object.
559 * fileName File to read.
560 * cache Address cache to load.
563 static gint addrharvest_readfile(
564 AddressHarvester *harvester, const gchar *fileName,
565 AddressCache *cache )
569 gchar buf[ MSG_BUFFSIZE ], tmp[ MSG_BUFFSIZE ];
572 msgFile = fopen( fileName, "rb" );
574 /* Cannot open file */
575 retVal = MGU_OPEN_FILE;
583 val = procheader_get_one_field(
584 buf, sizeof(buf), msgFile, NULL );
588 conv_unmime_header( tmp, sizeof(tmp), buf, NULL );
589 if(( p = strchr( tmp, ':' ) ) != NULL ) {
594 entry = addrharvest_find( harvester, tmp );
595 if( entry && entry->selected ) {
596 addrharvest_parse_address(
597 harvester, entry, cache, hdr );
607 * ============================================================================
608 * Read all files in specified directory into address book.
609 * Enter: harvester Harvester object.
610 * cache Address cache to load.
612 * ============================================================================
614 gint addrharvest_harvest( AddressHarvester *harvester, AddressCache *cache ) {
621 retVal = MGU_BAD_ARGS;
622 g_return_val_if_fail( harvester != NULL, retVal );
623 g_return_val_if_fail( cache != NULL, retVal );
624 g_return_val_if_fail( harvester->path != NULL, retVal );
627 addrcache_clear( cache );
628 cache->dataRead = FALSE;
630 if( chdir( harvester->path ) < 0 ) {
631 /* printf( "Error changing dir\n" ); */
635 if( ( dp = opendir( harvester->path ) ) == NULL ) {
636 /* printf( "Error opening dir\n" ); */
640 while( ( d = readdir( dp ) ) != NULL ) {
641 stat( d->d_name, &s );
642 if( S_ISREG( s.st_mode ) ) {
643 if( ( num = to_number( d->d_name ) ) >= 0 ) {
644 addrharvest_readfile(
645 harvester, d->d_name, cache );
653 cache->modified = FALSE;
654 cache->dataRead = TRUE;
660 * ============================================================================
661 * Test whether any headers have been selected for processing.
662 * Enter: harvester Harvester object.
663 * Return: TRUE if a header was selected, FALSE if none were selected.
664 * ============================================================================
666 gboolean addrharvest_check_header( AddressHarvester *harvester ) {
671 g_return_val_if_fail( harvester != NULL, retVal );
673 node = harvester->headerTable;
677 entry = ( HeaderEntry * ) node->data;
678 if( entry->selected ) return TRUE;
679 node = g_list_next( node );
685 * ============================================================================
687 * ============================================================================