Here's a patch that allows htdig to reject URL's on a local host
that are links (through the file system) to a URL that has already
been indexed. This is an updated version of the patch I submitted
for version 3.0.8b2, which can be found at:
ftp://sol.ccsf.cc.ca.us/htdig-patches/3.0.8b2/Retriever.cc.0
Gilles Detillieux posted an adaptation of this patch on December 7,
but it has a bug that may result in a core dump. Gilles acknowledged
that his patch was untested, and wrote:
... I removed the "return TRUE" after the visited.Add()
call, which would have caused a memory leak as it didn't
delete the local_filename at that point.
I believe it's an error to deallocate the storage for local_filename
when the pointer has been saved in the "visited" hash. This creates
a hanging pointer, and results in a seg fault later when "visited"
is deleted.
--
Warren Jones
Fluke Corporation
--------------------------------- snip snip ---------------------------------
Index: Retriever.cc
===================================================================
RCS file: /home/wjones/src/CVS.repo/htdig/htdig/Retriever.cc,v
retrieving revision 1.1.1.5
diff -c -r1.1.1.5 Retriever.cc
*** Retriever.cc 1999/12/15 22:06:09 1.1.1.5
--- Retriever.cc 2000/01/11 00:29:30
***************
*** 18,23 ****
--- 18,25 ----
#include <signal.h>
#include <assert.h>
#include <stdio.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
#include "HtWordType.h"
static WordList words;
***************
*** 603,609 ****
static String url;
url = u;
! return !visited.Exists(url);
}
--- 605,651 ----
static String url;
url = u;
! if ( visited.Exists(url) )
! return FALSE;
!
! String *local_filename = GetLocal(u);
! if ( ! local_filename )
! return TRUE;
!
! //
! // For local URL's, check list for device and inode to make
! // sure we haven't already indexed a link to this file.
! //
! struct stat buf;
!
! if ( stat(local_filename->get(),&buf) != 0 )
! {
! delete local_filename;
! return TRUE;
! }
!
! //
! // Make hash key from device and inode:
! //
! char key[2*sizeof(ino_t)+2*sizeof(dev_t)+2];
! sprintf( key, "%x+%x", buf.st_dev, buf.st_ino );
!
! if ( ! visited.Exists(key) )
! {
! visited.Add(key,local_filename);
! return TRUE;
! }
!
! if ( debug )
! {
! String *dup = (String*)visited.Find(key);
! cout << endl
! << "Duplicate: " << local_filename->get()
! << " -> " << dup->get() << endl;
! }
!
! delete local_filename;
! return FALSE;
}
------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED]
You will receive a message to confirm this.