2004-12-01 [paul] 0.9.12cvs177.2
[claws.git] / src / addrharvest.c
index 96583627d27df4fa67196144fa78632565bed435..c71ef48f33e59e4f094532a19e9fe5b28e4f842e 100644 (file)
@@ -19,7 +19,6 @@
 
 /*
  * Functions for an E-Mail address harvester.
- * Code still needs some work. Address parsing not strictly correct.
  */
 
 #include <sys/stat.h>
@@ -41,13 +40,17 @@ static gchar *_headerCC_       = HEADER_CC;
 static gchar *_headerTo_       = HEADER_TO;
 
 #define ADDR_BUFFSIZE    1024
-#define MSG_BUFFSIZE     16384
+#define MSG_BUFFSIZE     2048
+#define MSGNUM_BUFFSIZE  32
 #define DFL_FOLDER_SIZE  20
 
 /* Noise strings included by some other E-Mail clients */
 #define REM_NAME_STRING  "(E-mail)"
 #define REM_NAME_STRING2 "(E-mail 2)"
 
+/* Directories to ignore */
+#define DIR_IGNORE ".\t..\t.sylpheed_mark\t.sylpheed_cache"
+
 /*
  * Header entry.
  */
@@ -64,7 +67,7 @@ struct _HeaderEntry {
  * Enter: harvester Harvester object.
  *        name      Header name.
  */
-static void *addrharvest_build_entry(
+static void addrharvest_build_entry(
                AddressHarvester* harvester, gchar *name )
 {
        HeaderEntry *entry;
@@ -77,12 +80,6 @@ static void *addrharvest_build_entry(
        harvester->headerTable = g_list_append( harvester->headerTable, entry );
 }
 
-static void addrharvest_print_hdrentry( HeaderEntry *entry, FILE *stream ) {
-       fprintf( stream, "Header Entry\n" );
-       fprintf( stream, "    name : %s\n", entry->header );
-       fprintf( stream, "selected : %s\n", entry->selected ? "yes" : "no" );
-}
-
 /*
  * Free key in table.
  */
@@ -175,6 +172,18 @@ void addrharvest_set_folder_size(
        }
 }
 
+/*
+ * Specify folder recursion.
+ * Entry: harvester Harvester object.
+ *        value     TRUE to process sub-folders, FALSE to process folder only.
+ */
+void addrharvest_set_recurse(
+       AddressHarvester* harvester, const gboolean value )
+{
+       g_return_if_fail( harvester != NULL );
+       harvester->folderRecurse = value;
+}
+
 /*
  * Search (case insensitive) for header entry with specified name.
  * Enter: harvester Harvester.
@@ -192,7 +201,8 @@ static HeaderEntry *addrharvest_find(
                HeaderEntry *entry;
 
                entry = node->data;
-               if( g_strcasecmp( entry->header, name ) == 0 ) {
+               if (g_ascii_strncasecmp(entry->header, name,
+                                       sizeof(entry->header)) == 0 ) {
                        retVal = entry;
                        break;
                }
@@ -314,24 +324,12 @@ static void addrharvest_insert_cache(
                newFolder = TRUE;       /* Folder is full */
        }
 
-       if( newFolder ) {
-               cnt = 1 + ( entry->count / harvester->folderSize );
-               folderName = g_strdup_printf( "%s (%d)", entry->header, cnt );
-               folder = addritem_create_item_folder();
-               addritem_folder_set_name( folder, folderName );
-               addritem_folder_set_remarks( folder, "" );
-               addrcache_id_folder( cache, folder );
-               addrcache_add_folder( cache, folder );
-               entry->folder = folder;
-               g_free( folderName );
-       }
-
        /* Insert address */
        key = g_strdup( address );
        g_strdown( key );
        person = g_hash_table_lookup( harvester->dupTable, key );
        if( person ) {
-               /* Use longest name */
+               /* Update existing person to use longest name */
                value = ADDRITEM_NAME(person);
                if( strlen( name ) > strlen( value ) ) {
                        addritem_person_set_common_name( person, name );
@@ -339,26 +337,42 @@ static void addrharvest_insert_cache(
                g_free( key );
        }
        else {
+               /* Folder if required */
+               if( newFolder ) {
+                       cnt = 1 + ( entry->count / harvester->folderSize );
+                       folderName =g_strdup_printf( "%s (%d)",
+                                       entry->header, cnt );
+                       folder = addritem_create_item_folder();
+                       addritem_folder_set_name( folder, folderName );
+                       addritem_folder_set_remarks( folder, "" );
+                       addrcache_id_folder( cache, folder );
+                       addrcache_add_folder( cache, folder );
+                       entry->folder = folder;
+                       g_free( folderName );
+               }
+
                /* Insert entry */
                person = addrcache_add_contact(
                                cache, folder, name, address, "" );
                g_hash_table_insert( harvester->dupTable, key, person );
                entry->count++;
        }
+       addritem_parse_first_last( person );
 }
 
 /*
  * Remove specified string from name.
  * Enter: name Name.
- *        em   String to remove.
+ *        str  String to remove.
  */
-static void addrharvest_del_email( gchar *name, gchar *em ) {
+static void addrharvest_del_email( gchar *name, gchar *str ) {
        gchar *p;
-       gint ilen;
+       gint lenn, lenr;
 
-       ilen = strlen( em );
-       while( p = strcasestr( name, em )  ) {
-               memmove( p, p + ilen, ilen + 1 );
+       lenr = strlen( str );
+       while((p = strcasestr( name, str )) != NULL) {
+               lenn = strlen( p );
+               memmove( p, p + lenr, lenn );
        }
 }
 
@@ -502,11 +516,11 @@ static void addrharvest_parse_address(
        gchar buffer[ ADDR_BUFFSIZE + 2 ];
        const gchar *bp;
        const gchar *ep;
-       gchar *atCh, *email, *p;
+       gchar *atCh, *email, *name;
        gint bufLen;
 
        /* Search for an address */
-       while( atCh = addrharvest_find_at( hdrBuf ) ) {
+       while((atCh = addrharvest_find_at( hdrBuf )) != NULL) {
                /* Find addres string */
                addrharvest_find_address( hdrBuf, atCh, &bp, &ep );
 
@@ -520,13 +534,6 @@ static void addrharvest_parse_address(
                buffer[ bufLen + 1 ] = '\0';
                buffer[ bufLen + 2 ] = '\0';
 
-               /* Make whitespace */
-               p = buffer;
-               while( *p ) {
-                       if( *p == '\r' || *p == '\n' || *p == '\t' ) *p = ' ';
-                       p++;
-               }
-
                /* Extract address from buffer */
                email = addrharvest_extract_address( buffer );
                if( email ) {
@@ -544,15 +551,116 @@ static void addrharvest_parse_address(
                        mgu_str_ltc2space( buffer, '(', ')' );
                        g_strstrip( buffer );
 
+                       if( g_ascii_strcasecmp( buffer, email ) == 0 ) {
+                               name = "";
+                       }
+                       else {
+                               name = buffer;
+                               conv_unmime_header_overwrite(name);
+                       }
+
                        /* Insert into address book */
                        addrharvest_insert_cache(
-                               harvester, entry, cache, buffer, email );
+                               harvester, entry, cache, name, email );
                        g_free( email );
                }
                hdrBuf = ep;
        }
 }
 
+/*
+ * Test whether buffer contains a header that appears in header list.
+ * Enter: listHdr Header list.
+ *        buf     Header buffer.
+ * Return: TRUE if header in list.
+ */
+static gboolean addrharvest_check_hdr( GList *listHdr, gchar *buf ) {
+       gboolean retVal;
+       GList *node;
+       gchar *p, *hdr, *nhdr;
+       gint len;
+
+       retVal = FALSE;
+       p = strchr( buf, ':' );
+       if( p ) {
+               len = ( size_t ) ( p - buf );
+               hdr = g_strndup( buf, len );
+               node = listHdr;
+               while( node ) {
+                       nhdr = node->data;
+                       if (g_ascii_strncasecmp(nhdr, hdr, sizeof(nhdr)) == 0 ) {
+                               retVal = TRUE;
+                               break;
+                       }
+                       node = g_list_next( node );
+               }
+               g_free( hdr );
+       }
+       return retVal;
+}
+
+/*
+ * Read header into a linked list of lines.
+ * Enter:  fp      File to read.
+ *         listHdr List of header lines of interest.
+ *         done    End of headers or end of file reached.
+ * Return: Linked list of lines.
+ */
+static GSList *addrharvest_get_header( FILE *fp, GList *listHdr, gboolean *done ) {
+       GSList *list;
+       gchar buf[ MSG_BUFFSIZE + 2 ];
+       gint ch;
+       gboolean foundHdr;
+
+       list = NULL;
+
+       /* Read line */
+       if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
+               *done = TRUE;
+               return list;
+       }
+
+       /* Test for end of headers */
+       if( buf[0] == '\r' || buf[0] == '\n' ) {
+               *done = TRUE;
+               return list;
+       }
+
+       /* Test whether required header */
+       foundHdr = addrharvest_check_hdr( listHdr, buf );
+
+       /* Read all header lines. Only add reqd ones to list */
+       while( TRUE ) {
+               gchar *p;
+
+               if( foundHdr ) {
+                       p = g_strdup( buf );
+                       list = g_slist_append( list, p );
+               }
+
+               /* Read first character */
+               ch = fgetc( fp );
+               if( ch == ' ' || ch == '\t' ) {
+                       /* Continuation character - read into buffer */
+                       if( fgets( buf, MSG_BUFFSIZE, fp ) == NULL ) {
+                               break;
+                       }
+               }
+               else {
+                       if( ch == EOF ) {
+                               *done = TRUE;
+                       }
+                       else {
+                               /* Push back character for next header */
+                               ungetc( ch, fp );
+                       }
+                       break;
+               }
+       }
+
+       return list;
+}
+
 /*
  * Read specified file into address book.
  * Enter:  harvester Harvester object.
@@ -562,12 +670,14 @@ static void addrharvest_parse_address(
  */
 static gint addrharvest_readfile(
                AddressHarvester *harvester, const gchar *fileName,
-               AddressCache *cache )
+               AddressCache *cache, GList *listHdr )
 {
        gint retVal;
        FILE *msgFile;
-       gchar buf[ MSG_BUFFSIZE ], tmp[ MSG_BUFFSIZE ];
+       gchar *buf, *addr, *p;
        HeaderEntry *entry;
+       GSList *list;
+       gboolean done;
 
        msgFile = fopen( fileName, "rb" );
        if( ! msgFile ) {
@@ -576,47 +686,132 @@ static gint addrharvest_readfile(
                return retVal;
        }
 
-       for( ;; ) {
-               gint val;
-               gchar *p;
+       done = FALSE;
+       while( TRUE ) {
+               list = addrharvest_get_header( msgFile, listHdr, &done );
+               if( done ) break;
 
-               val = procheader_get_one_field(
-                       buf, sizeof(buf), msgFile, NULL );
-               if( val == -1 ) {
-                       break;
+               if( list == NULL ) {
+                       continue;
                }
-               conv_unmime_header( tmp, sizeof(tmp), buf, NULL );
-               if(( p = strchr( tmp, ':' ) ) != NULL ) {
-                       const gchar *hdr;
 
+               buf = mgu_list_coalesce( list );
+               mgu_free_list( list );
+
+               if(( p = strchr( buf, ':' ) ) != NULL ) {
+                       addr = p + 1;
                        *p = '\0';
-                       hdr = p + 1;
-                       entry = addrharvest_find( harvester, tmp );
+
+                       entry = addrharvest_find( harvester, buf );
                        if( entry && entry->selected ) {
+                               /* Sanitize control characters */
+                               p = addr;
+                               while( *p ) {
+                                       if( *p == '\r' || *p == '\n' || *p == '\t' )
+                                               *p = ' ';
+                                       p++;
+                               }
                                addrharvest_parse_address(
-                                       harvester, entry, cache, hdr );
+                                       harvester, entry, cache, addr );
                        }
                }
+               g_free( buf );
        }
 
        fclose( msgFile );
        return MGU_SUCCESS;
 }
 
+/*
+ * Read all files in specified directory into address book. Directories are
+ * traversed recursively if necessary.
+ * Enter:  harvester Harvester object.
+ *         cache     Address cache to load.
+ *         msgList   List of message numbers, or NULL to process folder.
+ *         dir       Directory to process.
+ */
+static void addrharvest_harvest_dir(
+       AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
+       gchar *dir )
+{
+       DIR *dp;
+       struct dirent *d;
+       struct stat s;
+       gint num;
+
+       if( ( dp = opendir( dir ) ) == NULL ) {
+               return;
+       }
+
+       /* Process directory */
+       chdir( dir );
+       while( ( d = readdir( dp ) ) != NULL ) {
+               stat( d->d_name, &s );
+               if( S_ISDIR( s.st_mode ) ) {
+                       if( harvester->folderRecurse ) {
+                               if( strstr( DIR_IGNORE, d->d_name ) != NULL )
+                                       continue;
+                               addrharvest_harvest_dir(
+                                       harvester, cache, listHdr, d->d_name );
+                       }
+               }
+               if( S_ISREG( s.st_mode ) ) {
+                       if( ( num = to_number( d->d_name ) ) >= 0 ) {
+                               addrharvest_readfile(
+                                       harvester, d->d_name, cache, listHdr );
+                       }
+               }
+       }
+       chdir( ".." );
+       closedir( dp );
+}
+
+/*
+ * Read list of files in specified directory into address book.
+ * Enter:  harvester Harvester object.
+ *         cache     Address cache to load.
+ *         msgList   List of message numbers, or NULL to process folder.
+ */
+static void addrharvest_harvest_list(
+       AddressHarvester *harvester, AddressCache *cache, GList *listHdr,
+       GList *msgList )
+{
+       DIR *dp;
+       gint num;
+       GList *node;
+       gchar msgNum[ MSGNUM_BUFFSIZE ];
+
+       if( ( dp = opendir( harvester->path ) ) == NULL ) {
+               return;
+       }
+
+       /* Process message list */
+       chdir( harvester->path );
+       node = msgList;
+       while( node ) {
+               num = GPOINTER_TO_UINT( node->data );
+               sprintf( msgNum, "%d", num );
+               addrharvest_readfile( harvester, msgNum, cache, listHdr );
+               node = g_list_next( node );
+       }
+       closedir( dp );
+}
+
 /*
  * ============================================================================
  * Read all files in specified directory into address book.
  * Enter:  harvester Harvester object.
  *         cache     Address cache to load.
+ *         msgList   List of message numbers, or NULL to process folder.
  * Return: Status.
  * ============================================================================
  */
-gint addrharvest_harvest( AddressHarvester *harvester, AddressCache *cache ) {
+gint addrharvest_harvest(
+       AddressHarvester *harvester, AddressCache *cache, GList *msgList )
+{
        gint retVal;
-       DIR *dp;
-       struct dirent *d;
-       struct stat s;
-       gint num;
+       GList *node;
+       GList *listHdr;
 
        retVal = MGU_BAD_ARGS;
        g_return_val_if_fail( harvester != NULL, retVal );
@@ -627,27 +822,31 @@ gint addrharvest_harvest( AddressHarvester *harvester, AddressCache *cache ) {
        addrcache_clear( cache );
        cache->dataRead = FALSE;
 
-       if( chdir( harvester->path ) < 0 ) {
-               /* printf( "Error changing dir\n" ); */
-               return retVal;
-       }
+       /* Build list of headers of interest */
+       listHdr = NULL;
+       node = harvester->headerTable;
+       while( node ) {
+               HeaderEntry *entry;
 
-       if( ( dp = opendir( harvester->path ) ) == NULL ) {
-               /* printf( "Error opening dir\n" ); */
-               return retVal;
-       }
+               entry = node->data;
+               if( entry->selected ) {
+                       gchar *p;
 
-       while( ( d = readdir( dp ) ) != NULL ) {
-               stat( d->d_name, &s );
-               if( S_ISREG( s.st_mode ) ) {
-                       if( ( num = to_number( d->d_name ) ) >= 0 ) {
-                               addrharvest_readfile(
-                                       harvester, d->d_name, cache );
-                       }
+                       p = g_strdup( entry->header );
+                       g_strdown( p );
+                       listHdr = g_list_append( listHdr, p );
                }
+               node = g_list_next( node );
        }
 
-       closedir( dp );
+       /* Process directory/files */
+       if( msgList == NULL ) {
+               addrharvest_harvest_dir( harvester, cache, listHdr, harvester->path );
+       }
+       else {
+               addrharvest_harvest_list( harvester, cache, listHdr, msgList );
+       }
+       mgu_free_dlist( listHdr );
 
        /* Mark cache */
        cache->modified = FALSE;