/*
* Functions for an E-Mail address harvester.
- * Code still needs some work. Address parsing not strictly correct.
*/
#include <sys/stat.h>
static gchar *_headerTo_ = HEADER_TO;
#define ADDR_BUFFSIZE 1024
-#define MSG_BUFFSIZE 16384
+#define MSG_BUFFSIZE 2048
+#define MSGNUM_BUFFSIZE 32
#define DFL_FOLDER_SIZE 20
/* Noise strings included by some other E-Mail clients */
#define REM_NAME_STRING "(E-mail)"
#define REM_NAME_STRING2 "(E-mail 2)"
+/* Directories to ignore */
+#define DIR_IGNORE ".\t..\t.sylpheed_mark\t.sylpheed_cache"
+
/*
* Header entry.
*/
* Enter: harvester Harvester object.
* name Header name.
*/
-static void *addrharvest_build_entry(
+static void addrharvest_build_entry(
AddressHarvester* harvester, gchar *name )
{
HeaderEntry *entry;
harvester->headerTable = g_list_append( harvester->headerTable, entry );
}
-static void addrharvest_print_hdrentry( HeaderEntry *entry, FILE *stream ) {
- fprintf( stream, "Header Entry\n" );
- fprintf( stream, " name : %s\n", entry->header );
- fprintf( stream, "selected : %s\n", entry->selected ? "yes" : "no" );
-}
-
/*
* Free key in table.
*/
}
}
+/*
+ * Specify folder recursion.
+ * Entry: harvester Harvester object.
+ * value TRUE to process sub-folders, FALSE to process folder only.
+ */
+void addrharvest_set_recurse(
+ AddressHarvester* harvester, const gboolean value )
+{
+ g_return_if_fail( harvester != NULL );
+ harvester->folderRecurse = value;
+}
+
/*
* Search (case insensitive) for header entry with specified name.
* Enter: harvester Harvester.
HeaderEntry *entry;
entry = node->data;
- if( g_strcasecmp( entry->header, name ) == 0 ) {
+ if (g_ascii_strncasecmp(entry->header, name,
+ sizeof(entry->header)) == 0 ) {
retVal = entry;
break;
}
newFolder = TRUE; /* Folder is full */
}
- if( newFolder ) {
- cnt = 1 + ( entry->count / harvester->folderSize );
- folderName = g_strdup_printf( "%s (%d)", entry->header, cnt );
- folder = addritem_create_item_folder();
- addritem_folder_set_name( folder, folderName );
- addritem_folder_set_remarks( folder, "" );
- addrcache_id_folder( cache, folder );
- addrcache_add_folder( cache, folder );
- entry->folder = folder;
- g_free( folderName );
- }
-
/* Insert address */
key = g_strdup( address );
g_strdown( key );
person = g_hash_table_lookup( harvester->dupTable, key );
if( person ) {
- /* Use longest name */
+ /* Update existing person to use longest name */
value = ADDRITEM_NAME(person);
if( strlen( name ) > strlen( value ) ) {
addritem_person_set_common_name( person, name );
g_free( key );
}
else {
+ /* Folder if required */
+ if( newFolder ) {
+ cnt = 1 + ( entry->count / harvester->folderSize );
+ folderName =g_strdup_printf( "%s (%d)",
+ entry->header, cnt );
+ folder = addritem_create_item_folder();
+ addritem_folder_set_name( folder, folderName );
+ addritem_folder_set_remarks( folder, "" );
+ addrcache_id_folder( cache, folder );
+ addrcache_add_folder( cache, folder );
+ entry->folder = folder;
+ g_free( folderName );
+ }
+
/* Insert entry */
person = addrcache_add_contact(
cache, folder, name, address, "" );
g_hash_table_insert( harvester->dupTable, key, person );
entry->count++;
}
+ addritem_parse_first_last( person );
}
/*
* Remove specified string from name.
* Enter: name Name.
- * em String to remove.
+ * str String to remove.
*/
-static void addrharvest_del_email( gchar *name, gchar *em ) {
+static void addrharvest_del_email( gchar *name, gchar *str ) {
gchar *p;
- gint ilen;
+ gint lenn, lenr;
- ilen = strlen( em );
- while( p = strcasestr( name, em ) ) {
- memmove( p, p + ilen, ilen + 1 );
+ lenr = strlen( str );
+ while((p = strcasestr( name, str )) != NULL) {
+ lenn = strlen( p );
+ memmove( p, p + lenr, lenn );
}
}
gchar buffer[ ADDR_BUFFSIZE + 2 ];
const gchar *bp;
const gchar *ep;
- gchar *atCh, *email, *p;
+ gchar *atCh, *email, *name;
gint bufLen;
/* Search for an address */
- while( atCh = addrharvest_find_at( hdrBuf ) ) {
+ while((atCh = addrharvest_find_at( hdrBuf )) != NULL) {
/* Find addres string */
addrharvest_find_address( hdrBuf, atCh, &bp, &ep );
buffer[ bufLen + 1 ] = '\0';
buffer[ bufLen + 2 ] = '\0';
- /* Make whitespace */
- p = buffer;
- while( *p ) {
- if( *p == '\r' || *p == '\n' || *p == '\t' ) *p = ' ';
- p++;
- }
-
/* Extract address from buffer */
email = addrharvest_extract_address( buffer );
if( email ) {
mgu_str_ltc2space( buffer, '(', ')' );
g_strstrip( buffer );
+ if( g_ascii_strcasecmp( buffer, email ) == 0 ) {
+ name = "";
+ }
+ else {
+ name = buffer;
+ conv_unmime_header_overwrite(name);
+ }
+
/* Insert into address book */
addrharvest_insert_cache(
- harvester, entry, cache, buffer, email );
+ harvester, entry, cache, name, email );
g_free( email );
}
hdrBuf = ep;
}
}
+/*
+ * Test whether buffer contains a header that appears in header list.
+ * Enter: listHdr Header list.
+ * buf Header buffer.
+ * Return: TRUE if header in list.
+ */
+static gboolean addrharvest_check_hdr( GList *listHdr, gchar *buf ) {
+ gboolean retVal;
+ GList *node;
+ gchar *p, *hdr, *nhdr;
+ gint len;
+
+ retVal = FALSE;
+ p = strchr( buf, ':' );
+ if( p ) {
+ len = ( size_t ) ( p - buf );
+ hdr = g_strndup( buf, len );
+ node = listHdr;
+ while( node ) {
+ nhdr = node->data;
+ if (g_ascii_strncasecmp(nhdr, hdr, sizeof(nhdr)) == 0 ) {
+ retVal = TRUE;
+ break;
+ }
+ node = g_list_next( node );
+ }
+ g_free( hdr );
+ }
+ return retVal;
+}
+
+/*
+ * Read header into a linked list of lines.
+ * Enter: fp File to read.
+ * listHdr List of header lines of interest.
+ * done End of headers or end of file reached.
+ * Return: Linked list of lines.
+ */
+static GSList *addrharvest_get_header( FILE *fp, GList *listHdr, gboolean *done ) {
+ GSList *list;
+ gchar buf[ MSG_BUFFSIZE + 2 ];
+ gint ch;
+ gboolean foundHdr;
+
+ list = NULL;
+
+ /* Read line */
+ if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
+ *done = TRUE;
+ return list;
+ }
+
+ /* Test for end of headers */
+ if( buf[0] == '\r' || buf[0] == '\n' ) {
+ *done = TRUE;
+ return list;
+ }
+
+ /* Test whether required header */
+ foundHdr = addrharvest_check_hdr( listHdr, buf );
+
+ /* Read all header lines. Only add reqd ones to list */
+ while( TRUE ) {
+ gchar *p;
+
+ if( foundHdr ) {
+ p = g_strdup( buf );
+ list = g_slist_append( list, p );
+ }
+
+ /* Read first character */
+ ch = fgetc( fp );
+ if( ch == ' ' || ch == '\t' ) {
+ /* Continuation character - read into buffer */
+ if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
+ break;
+ }
+ }
+ else {
+ if( ch == EOF ) {
+ *done = TRUE;
+ }
+ else {
+ /* Push back character for next header */
+ ungetc( ch, fp );
+ }
+ break;
+ }
+ }
+
+ return list;
+}
+
/*
* Read specified file into address book.
* Enter: harvester Harvester object.
*/
static gint addrharvest_readfile(
AddressHarvester *harvester, const gchar *fileName,
- AddressCache *cache )
+ AddressCache *cache, GList *listHdr )
{
gint retVal;
FILE *msgFile;
- gchar buf[ MSG_BUFFSIZE ], tmp[ MSG_BUFFSIZE ];
+ gchar *buf, *addr, *p;
HeaderEntry *entry;
+ GSList *list;
+ gboolean done;
msgFile = fopen( fileName, "rb" );
if( ! msgFile ) {
return retVal;
}
- for( ;; ) {
- gint val;
- gchar *p;
+ done = FALSE;
+ while( TRUE ) {
+ list = addrharvest_get_header( msgFile, listHdr, &done );
+ if( done ) break;
- val = procheader_get_one_field(
- buf, sizeof(buf), msgFile, NULL );
- if( val == -1 ) {
- break;
+ if( list == NULL ) {
+ continue;
}
- conv_unmime_header( tmp, sizeof(tmp), buf, NULL );
- if(( p = strchr( tmp, ':' ) ) != NULL ) {
- const gchar *hdr;
+ buf = mgu_list_coalesce( list );
+ mgu_free_list( list );
+
+ if(( p = strchr( buf, ':' ) ) != NULL ) {
+ addr = p + 1;
*p = '\0';
- hdr = p + 1;
- entry = addrharvest_find( harvester, tmp );
+
+ entry = addrharvest_find( harvester, buf );
if( entry && entry->selected ) {
+ /* Sanitize control characters */
+ p = addr;
+ while( *p ) {
+ if( *p == '\r' || *p == '\n' || *p == '\t' )
+ *p = ' ';
+ p++;
+ }
addrharvest_parse_address(
- harvester, entry, cache, hdr );
+ harvester, entry, cache, addr );
}
}
+ g_free( buf );
}
fclose( msgFile );
return MGU_SUCCESS;
}
+/*
+ * Read all files in specified directory into address book. Directories are
+ * traversed recursively if necessary.
+ * Enter: harvester Harvester object.
+ * cache Address cache to load.
+ * msgList List of message numbers, or NULL to process folder.
+ * dir Directory to process.
+ */
+static void addrharvest_harvest_dir(
+ AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
+ gchar *dir )
+{
+ DIR *dp;
+ struct dirent *d;
+ struct stat s;
+ gint num;
+
+ if( ( dp = opendir( dir ) ) == NULL ) {
+ return;
+ }
+
+ /* Process directory */
+ chdir( dir );
+ while( ( d = readdir( dp ) ) != NULL ) {
+ stat( d->d_name, &s );
+ if( S_ISDIR( s.st_mode ) ) {
+ if( harvester->folderRecurse ) {
+ if( strstr( DIR_IGNORE, d->d_name ) != NULL )
+ continue;
+ addrharvest_harvest_dir(
+ harvester, cache, listHdr, d->d_name );
+ }
+ }
+ if( S_ISREG( s.st_mode ) ) {
+ if( ( num = to_number( d->d_name ) ) >= 0 ) {
+ addrharvest_readfile(
+ harvester, d->d_name, cache, listHdr );
+ }
+ }
+ }
+ chdir( ".." );
+ closedir( dp );
+}
+
+/*
+ * Read list of files in specified directory into address book.
+ * Enter: harvester Harvester object.
+ * cache Address cache to load.
+ * msgList List of message numbers, or NULL to process folder.
+ */
+static void addrharvest_harvest_list(
+ AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
+ GList *msgList )
+{
+ DIR *dp;
+ gint num;
+ GList *node;
+ gchar msgNum[ MSGNUM_BUFFSIZE ];
+
+ if( ( dp = opendir( harvester->path ) ) == NULL ) {
+ return;
+ }
+
+ /* Process message list */
+ chdir( harvester->path );
+ node = msgList;
+ while( node ) {
+ num = GPOINTER_TO_UINT( node->data );
+ sprintf( msgNum, "%d", num );
+ addrharvest_readfile( harvester, msgNum, cache, listHdr );
+ node = g_list_next( node );
+ }
+ closedir( dp );
+}
+
/*
* ============================================================================
* Read all files in specified directory into address book.
* Enter: harvester Harvester object.
* cache Address cache to load.
+ * msgList List of message numbers, or NULL to process folder.
* Return: Status.
* ============================================================================
*/
-gint addrharvest_harvest( AddressHarvester *harvester, AddressCache *cache ) {
+gint addrharvest_harvest(
+ AddressHarvester *harvester, AddressCache *cache, GList *msgList )
+{
gint retVal;
- DIR *dp;
- struct dirent *d;
- struct stat s;
- gint num;
+ GList *node;
+ GList *listHdr;
retVal = MGU_BAD_ARGS;
g_return_val_if_fail( harvester != NULL, retVal );
addrcache_clear( cache );
cache->dataRead = FALSE;
- if( chdir( harvester->path ) < 0 ) {
- /* printf( "Error changing dir\n" ); */
- return retVal;
- }
+ /* Build list of headers of interest */
+ listHdr = NULL;
+ node = harvester->headerTable;
+ while( node ) {
+ HeaderEntry *entry;
- if( ( dp = opendir( harvester->path ) ) == NULL ) {
- /* printf( "Error opening dir\n" ); */
- return retVal;
- }
+ entry = node->data;
+ if( entry->selected ) {
+ gchar *p;
- while( ( d = readdir( dp ) ) != NULL ) {
- stat( d->d_name, &s );
- if( S_ISREG( s.st_mode ) ) {
- if( ( num = to_number( d->d_name ) ) >= 0 ) {
- addrharvest_readfile(
- harvester, d->d_name, cache );
- }
+ p = g_strdup( entry->header );
+ g_strdown( p );
+ listHdr = g_list_append( listHdr, p );
}
+ node = g_list_next( node );
}
- closedir( dp );
+ /* Process directory/files */
+ if( msgList == NULL ) {
+ addrharvest_harvest_dir( harvester, cache, listHdr, harvester->path );
+ }
+ else {
+ addrharvest_harvest_list( harvester, cache, listHdr, msgList );
+ }
+ mgu_free_dlist( listHdr );
/* Mark cache */
cache->modified = FALSE;