2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 2002 Match Grun
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 * Functions for an E-Mail address harvester.
22 * Code still needs some work. Address parsing not strictly correct.
32 #include "addrharvest.h"
35 /* Mail header names of interest */
36 static gchar *_headerFrom_ = HEADER_FROM;
37 static gchar *_headerReplyTo_ = HEADER_REPLY_TO;
38 static gchar *_headerSender_ = HEADER_SENDER;
39 static gchar *_headerErrorsTo_ = HEADER_ERRORS_TO;
40 static gchar *_headerCC_ = HEADER_CC;
41 static gchar *_headerTo_ = HEADER_TO;
43 static gchar *_emptyString_ = "";
45 #define MSG_BUFFSIZE 8192
46 #define DFL_FOLDER_SIZE 20
51 typedef struct _HeaderEntry HeaderEntry;
60 * Build header table entry.
61 * Enter: harvester Harvester object.
64 static void *addrharvest_build_entry(
65 AddressHarvester* harvester, gchar *name )
69 entry = g_new0( HeaderEntry, 1 );
71 entry->selected = FALSE;
74 harvester->headerTable = g_list_append( harvester->headerTable, entry );
77 static void addrharvest_print_hdrentry( HeaderEntry *entry, FILE *stream ) {
78 fprintf( stream, "Header Entry\n" );
79 fprintf( stream, " name : %s\n", entry->header );
80 fprintf( stream, "selected : %s\n", entry->selected ? "yes" : "no" );
86 static gint addrharvest_free_table_vis( gpointer key, gpointer value, gpointer data ) {
96 static void addrharvest_free_table( AddressHarvester* harvester ) {
100 /* Free header list */
101 node = harvester->headerTable;
103 entry = ( HeaderEntry * ) node->data;
104 entry->header = NULL;
105 entry->selected = FALSE;
106 entry->folder = NULL;
109 node = g_list_next( node );
111 g_list_free( harvester->headerTable );
112 harvester->headerTable = NULL;
114 /* Free duplicate table */
115 g_hash_table_freeze( harvester->dupTable );
116 g_hash_table_foreach_remove( harvester->dupTable, addrharvest_free_table_vis, NULL );
117 g_hash_table_thaw( harvester->dupTable );
118 g_hash_table_destroy( harvester->dupTable );
119 harvester->dupTable = NULL;
126 AddressHarvester *addrharvest_create( void ) {
127 AddressHarvester *harvester;
129 harvester = g_new0( AddressHarvester, 1 );
130 harvester->path = NULL;
131 harvester->bufptr = harvester->buffer;
132 harvester->dupTable = g_hash_table_new( g_str_hash, g_str_equal );
133 harvester->folderSize = DFL_FOLDER_SIZE;
134 harvester->retVal = MGU_SUCCESS;
136 /* Build header table */
137 harvester->headerTable = NULL;
138 addrharvest_build_entry( harvester, _headerFrom_ );
139 addrharvest_build_entry( harvester, _headerReplyTo_ );
140 addrharvest_build_entry( harvester, _headerSender_ );
141 addrharvest_build_entry( harvester, _headerErrorsTo_ );
142 addrharvest_build_entry( harvester, _headerCC_ );
143 addrharvest_build_entry( harvester, _headerTo_ );
152 * Specify path to folder that will be harvested.
153 * Entry: harvester Harvester object.
154 * value Full directory path.
156 void addrharvest_set_path( AddressHarvester* harvester, const gchar *value ) {
157 g_return_if_fail( harvester != NULL );
158 harvester->path = mgu_replace_string( harvester->path, value );
159 g_strstrip( harvester->path );
163 * Specify maximum folder size.
164 * Entry: harvester Harvester object.
167 void addrharvest_set_folder_size( AddressHarvester* harvester, const gint value ) {
168 g_return_if_fail( harvester != NULL );
170 harvester->folderSize = value;
175 * Search (case insensitive) for header entry with specified name.
176 * Enter: harvester Harvester.
178 * Return: Header, or NULL if not found.
180 static HeaderEntry *addrharvest_find(
181 AddressHarvester* harvester, const gchar *name ) {
186 node = harvester->headerTable;
191 if( g_strcasecmp( entry->header, name ) == 0 ) {
195 node = g_list_next( node );
201 * Set selection for specified heaader.
202 * Enter: harvester Harvester.
204 * value Value to set.
206 void addrharvest_set_header(
207 AddressHarvester* harvester, const gchar *name, const gboolean value )
211 g_return_if_fail( harvester != NULL );
212 entry = addrharvest_find( harvester, name );
213 if( entry != NULL ) {
214 entry->selected = value;
220 * Enter: harvester Harvester.
222 * Return: Address count, or -1 if header not found.
224 gint addrharvest_get_count(
225 AddressHarvester* harvester, const gchar *name )
231 g_return_val_if_fail( harvester != NULL, count );
232 entry = addrharvest_find( harvester, name );
233 if( entry != NULL ) {
234 count = entry->count;
240 * Free up object by releasing internal memory.
241 * Enter: harvester Harvester.
243 void addrharvest_free( AddressHarvester *harvester ) {
244 g_return_if_fail( harvester != NULL );
246 /* Free internal stuff */
247 addrharvest_free_table( harvester );
248 g_free( harvester->path );
251 harvester->path = NULL;
252 harvester->retVal = MGU_SUCCESS;
253 harvester->headerTable = NULL;
255 harvester->folderSize = 0;
257 /* Now release object */
262 * Display object to specified stream.
263 * Enter: harvester Harvester.
264 * stream Output stream.
266 void addrharvest_print( AddressHarvester *harvester, FILE *stream ) {
270 g_return_if_fail( harvester != NULL );
271 fprintf( stream, "Address Harvester:\n" );
272 fprintf( stream, " file path: '%s'\n", harvester->path );
273 fprintf( stream, "max folder: %d'\n", harvester->folderSize );
275 node = harvester->headerTable;
278 fprintf( stream, " header: %s", entry->header );
279 fprintf( stream, "\t: %s", entry->selected ? "yes" : "no" );
280 fprintf( stream, "\t: %d\n", entry->count );
281 node = g_list_next( node );
283 fprintf( stream, " ret val: %d\n", harvester->retVal );
287 gint to_number(const gchar *nstr) {
288 register const gchar *p;
289 if (*nstr == '\0') return -1;
290 for( p = nstr; *p != '\0'; p++ )
291 if (!isdigit(*p)) return -1;
297 * Replace leading and trailing characters (quotes) in input string
298 * with spaces. Only matching non-blank characters that appear at both
299 * start and end of string are replaces. Control characters are also
300 * replaced with spaces.
301 * Enter: str String to process.
302 * ch Character to remove.
304 static void addrutil_strip_char( gchar *str, gchar ch ) {
308 /* Search forwards for first non-space match */
310 ae = -1 + str + strlen( str );
314 /* Search backwards from end for match */
325 else if( *ae == 127 ) {
338 else if( *as == 127 ) {
351 * Remove backslash character from input string.
352 * Enter: str String to process.
354 static void addrutil_unescape( gchar *str ) {
361 ilen = strlen( p + 1 );
362 memmove( p, p + 1, ilen );
369 * Parse name from email address string.
370 * Enter: buf Start address of buffer to process (not modified).
371 * atp Pointer to email at (@) character.
372 * ap Pointer to start of email address returned.
373 * ep Pointer to end of email address returned.
374 * Return: Parsed name or NULL if not present. This should be g_free'd
377 static gchar *addrutil_parse_name(
378 const gchar *buf, const gchar *atp, const gchar **ap,
391 /* Find first non-separator char */
394 if( strchr( ",; \n\r", *bp ) == NULL ) break;
398 /* Search back for start of name */
404 /* Found start of address/end of name part */
405 ilen = -1 + ( size_t ) ( pos - bp );
406 name = g_strndup( bp, ilen + 1 );
407 *(name + ilen + 1) = '\0';
409 /* Remove leading trailing quotes and spaces */
410 addrutil_strip_char( name, '\"' );
411 addrutil_strip_char( name, '\'' );
412 addrutil_strip_char( name, '\"' );
413 addrutil_unescape( name );
421 /* Search forward for end of address */
428 if( strchr( ",; \'\n\r", *pos ) ) break;
438 * Insert address into cache.
439 * Enter: harvester Harvester object.
440 * entry Header object.
441 * cache Address cache to load.
443 * address eMail address.
444 * Return: Person inserted.
446 static ItemPerson *addrharvest_insert_cache(
447 AddressHarvester *harvester, HeaderEntry *entry,
448 AddressCache *cache, const gchar *name,
449 const gchar *address )
458 folder = entry->folder;
459 if( folder == NULL ) {
460 newFolder = TRUE; /* No folder yet */
462 if( entry->count % harvester->folderSize == 0 ) {
463 newFolder = TRUE; /* Folder is full */
467 cnt = 1 + ( entry->count / harvester->folderSize );
468 folderName = g_strdup_printf( "%s (%d)", entry->header, cnt );
469 folder = addritem_create_item_folder();
470 addritem_folder_set_name( folder, folderName );
471 addritem_folder_set_remarks( folder, "" );
472 addrcache_id_folder( cache, folder );
473 addrcache_add_folder( cache, folder );
474 entry->folder = folder;
475 g_free( folderName );
478 person = addrcache_add_contact( cache, folder, name, address, "" );
486 * Parse address from header buffer creating address in cache.
487 * Enter: harvester Harvester object.
488 * entry Header object.
489 * cache Address cache to load.
490 * hdrBuf Pointer to header buffer.
492 static void addrharvest_parse_address(
493 AddressHarvester *harvester, HeaderEntry *entry,
494 AddressCache *cache, const gchar *hdrBuf )
496 gchar addr[ MSG_BUFFSIZE ];
506 /* printf( "hdrBuf :%s:\n", hdrBuf ); */
507 /* Search for an address */
508 while( atCh = strcasestr( hdrBuf, ATCHAR ) ) {
509 name = addrutil_parse_name( hdrBuf, atCh, &bp, &ep );
510 addrLen = ( size_t ) ( ep - bp );
511 strncpy( addr, bp, addrLen );
512 addr[ addrLen ] = '\0';
513 extract_address( addr );
514 /* printf( "name/addr :%s:\t:%s:\n", addr, name ); */
519 if( strlen( addr ) > 0 ) {
521 name = g_strdup( _emptyString_ );
524 /* printf( "name/addr :%s:\t:%s:\n", addr, name ); */
525 person = g_hash_table_lookup(
526 harvester->dupTable, addr );
528 /* Use longest name */
529 value = ADDRITEM_NAME(person);
530 if( strlen( name ) > strlen( value ) ) {
531 addritem_person_set_common_name(
537 key = g_strdup( addr );
538 person = addrharvest_insert_cache(
539 harvester, entry, cache, name, addr );
541 harvester->dupTable, key, person );
549 * Read specified file into address book.
550 * Enter: harvester Harvester object.
551 * fileName File to read.
552 * cache Address cache to load.
555 static gint addrharvest_readfile(
556 AddressHarvester *harvester, const gchar *fileName,
557 AddressCache *cache )
561 gchar buf[ MSG_BUFFSIZE ], tmp[ MSG_BUFFSIZE ];
564 msgFile = fopen( fileName, "r" );
566 /* Cannot open file */
567 retVal = MGU_OPEN_FILE;
575 val = procheader_get_one_field( buf, sizeof(buf), msgFile, NULL );
579 conv_unmime_header( tmp, sizeof(tmp), buf, NULL );
580 if(( p = strchr( tmp, ':' ) ) != NULL ) {
585 entry = addrharvest_find( harvester, tmp );
586 if( entry && entry->selected ) {
587 addrharvest_parse_address(
588 harvester, entry, cache, hdr );
600 * ============================================================================
601 * Read all files in specified directory into address book.
602 * Enter: harvester Harvester object.
603 * cache Address cache to load.
605 * ============================================================================
607 gint addrharvest_harvest( AddressHarvester *harvester, AddressCache *cache ) {
614 retVal = MGU_BAD_ARGS;
615 g_return_val_if_fail( harvester != NULL, retVal );
616 g_return_val_if_fail( cache != NULL, retVal );
617 g_return_val_if_fail( harvester->path != NULL, retVal );
620 addrcache_clear( cache );
621 cache->dataRead = FALSE;
623 if( chdir( harvester->path ) < 0 ) {
624 printf( "Error changing dir\n" );
628 if( ( dp = opendir( harvester->path ) ) == NULL ) {
629 printf( "Error opening dir\n" );
633 while( ( d = readdir( dp ) ) != NULL ) {
634 stat( d->d_name, &s );
635 if( S_ISREG( s.st_mode ) ) {
636 if( ( num = to_number( d->d_name ) ) >= 0 ) {
637 addrharvest_readfile( harvester, d->d_name, cache );
645 cache->modified = FALSE;
646 cache->dataRead = TRUE;
652 * ============================================================================
653 * Test whether any headers have been selected for processing.
654 * Enter: harvester Harvester object.
655 * Return: TRUE if a header was selected, FALSE if none were selected.
656 * ============================================================================
658 gboolean addrharvest_check_header( AddressHarvester *harvester ) {
663 g_return_val_if_fail( harvester != NULL, retVal );
665 node = harvester->headerTable;
669 entry = ( HeaderEntry * ) node->data;
670 if( entry->selected ) return TRUE;
671 node = g_list_next( node );
677 * ============================================================================
679 * ============================================================================