Here's a patch that allows htdig to reject URL's on a local host
that are links (through the file system) to a URL that has already
been indexed.  This is an updated version of the patch I submitted
for version 3.0.8b2, which can be found at:

     ftp://sol.ccsf.cc.ca.us/htdig-patches/3.0.8b2/Retriever.cc.0

Gilles Detillieux posted an adaptation of this patch on December 7,
but it has a bug that may result in a core dump.  Gilles acknowledged
that his patch was untested, and wrote:

    ... I removed the "return TRUE" after the visited.Add()
    call, which would have caused a memory leak as it didn't
    delete the local_filename at that point.

I believe it's an error to deallocate the storage for local_filename
when the pointer has been saved in the "visited" hash.  This creates
a hanging pointer, and results in a seg fault later when "visited"
is deleted.

-- 
Warren Jones
Fluke Corporation

--------------------------------- snip snip ---------------------------------

Index: Retriever.cc
===================================================================
RCS file: /home/wjones/src/CVS.repo/htdig/htdig/Retriever.cc,v
retrieving revision 1.1.1.5
diff -c -r1.1.1.5 Retriever.cc
*** Retriever.cc        1999/12/15 22:06:09     1.1.1.5
--- Retriever.cc        2000/01/11 00:29:30
***************
*** 18,23 ****
--- 18,25 ----
  #include <signal.h>
  #include <assert.h>
  #include <stdio.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
  #include "HtWordType.h"
  
  static WordList       words;
***************
*** 603,609 ****
      static String     url;
      url = u;
  
!     return !visited.Exists(url);
  }
  
  
--- 605,651 ----
      static String     url;
      url = u;
  
!     if ( visited.Exists(url) )
!       return FALSE;
! 
!     String *local_filename = GetLocal(u);
!     if ( ! local_filename )
!       return TRUE;
! 
!     //
!     // For local URL's, check list for device and inode to make
!     // sure we haven't already indexed a link to this file.
!     //
!     struct stat buf;
! 
!     if ( stat(local_filename->get(),&buf) != 0 )
!     {
!       delete local_filename;
!       return TRUE;
!     }
! 
!     //
!     // Make hash key from device and inode:
!     //
!     char key[2*sizeof(ino_t)+2*sizeof(dev_t)+2];
!     sprintf( key, "%x+%x", buf.st_dev, buf.st_ino );
! 
!     if ( ! visited.Exists(key) )
!     {
!       visited.Add(key,local_filename);
!       return TRUE;
!     }
! 
!     if ( debug )
!     {
!       String *dup = (String*)visited.Find(key);
!       cout << endl
!       << "Duplicate: " << local_filename->get()
!       << " -> " << dup->get() << endl;
!     }
! 
!     delete local_filename;
!     return FALSE;
  }
  
  

------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED] 
You will receive a message to confirm this. 

Reply via email to