Sigh. I should probably cook up a test program for HtVector and HtHeap
again, but I think it's time to grab some sleep.

The following are some patches that start to convert the indexing to sort
based on hopcount. Right now it works in the single server case, but I
need to make the list of servers into a heap so it works in the multiple
server case.

The problem seems to be that HtHeap/HtVector isn't freeing memory
correctly.

I get output like this:

..
32:34:2:http://www.htdig.org/hts_templates.html: *******-* size = 8494
33:35:2:http://www.htdig.org/mailing.html: *-** size = 2729
34:67:2:http::0H@@ww.htdig.org/hts_templates.html: Unknown host: 
 host not found
35:68:2:http::0h@[EMAIL PROTECTED]/mailing.html: Unknown host: 
 host not found

Since I fixed one bug in HtHeap already, I'm assuming these "phantom" URLs
are simply URLRef's that have not been properly freed. If someone can spot
the program, I'd be very greatful (Offhand, I'd guess it's in HtVector.)

-Geoff

Index: htdig/Server.cc
===================================================================
RCS file: /opt/htdig/cvs/htdig3/htdig/Server.cc,v
retrieving revision 1.9
diff -c -3 -p -r1.9 Server.cc
*** htdig/Server.cc     1999/07/03 20:56:39     1.9
--- htdig/Server.cc     1999/08/11 03:39:13
*************** void Server::push(char *path, int hopcou
*** 235,244 ****
      ref->SetURL(path);
      ref->SetHopCount(hopcount);
      ref->SetReferer(referer);
!     _paths.push(ref);
      _documents++;
  
! //    cout << "***** pushing '" << path << "' with '" << referer << "'\n";
  }
  
  
--- 235,244 ----
      ref->SetURL(path);
      ref->SetHopCount(hopcount);
      ref->SetReferer(referer);
!     _paths.Add(ref);
      _documents++;
  
!     //    cout << "***** pushing '" << path << "' with '" << referer << "'\n";
  }
  
  
*************** void Server::push(char *path, int hopcou
*** 247,253 ****
  //
  URLRef *Server::pop()
  {
!     URLRef    *ref = (URLRef *) _paths.pop();
      if (!ref)
        return 0;
  
--- 247,254 ----
  //
  URLRef *Server::pop()
  {
!     URLRef    *ref = (URLRef *) _paths.Remove();
!     
      if (!ref)
        return 0;
  
Index: htdig/Server.h
===================================================================
RCS file: /opt/htdig/cvs/htdig3/htdig/Server.h,v
retrieving revision 1.4
diff -c -3 -p -r1.4 Server.h
*** htdig/Server.h      1999/07/03 20:56:39     1.4
--- htdig/Server.h      1999/08/11 03:39:13
***************
*** 22,27 ****
--- 22,28 ----
  #include "Queue.h"
  #include "StringMatch.h"
  #include "URLRef.h"
+ #include "HtHeap.h"
  
  class Document;
  
*************** private:
*** 75,81 ****
        int                     _bad_server;            // TRUE if we shouldn't use 
this one
        int                     _connection_space;      // Seconds between connections
        time_t                  _last_connection;       // Time of last connection to 
this server
!       Queue                   _paths;
        StringMatch             _disallow;      // This pattern will be used to test 
paths
        int                     _documents;     // Number of documents visited
        int                     _max_documents;  // Maximum number of documents from 
this server
--- 76,82 ----
        int                     _bad_server;            // TRUE if we shouldn't use 
this one
        int                     _connection_space;      // Seconds between connections
        time_t                  _last_connection;       // Time of last connection to 
this server
!       HtHeap                  _paths;
        StringMatch             _disallow;      // This pattern will be used to test 
paths
        int                     _documents;     // Number of documents visited
        int                     _max_documents;  // Maximum number of documents from 
this server
Index: htdig/URLRef.cc
===================================================================
RCS file: /opt/htdig/cvs/htdig3/htdig/URLRef.cc,v
retrieving revision 1.2
diff -c -3 -p -r1.2 URLRef.cc
*** htdig/URLRef.cc     1999/07/03 20:56:39     1.2
--- htdig/URLRef.cc     1999/08/11 03:39:13
*************** static char RCSid[] = "$Id: URLRef.cc,v 
*** 22,28 ****
  //
  URLRef::URLRef()
  {
!       hopcount = 0;
  }
  
  
--- 22,28 ----
  //
  URLRef::URLRef()
  {
!   hopcount = 0;
  }
  
  
*************** URLRef::URLRef()
*** 32,36 ****
--- 32,48 ----
  URLRef::~URLRef()
  {
  }
+ 
+ 
+ //*****************************************************************************
+ // int URLRef::compare(Object *to)
+ //
+ int URLRef::compare(Object *to)
+ {
+   URLRef      *u = (URLRef *) to;
+ 
+   return hopcount - u->hopcount;
+ }
+ 
  
  
Index: htdig/URLRef.h
===================================================================
RCS file: /opt/htdig/cvs/htdig3/htdig/URLRef.h,v
retrieving revision 1.3
diff -c -3 -p -r1.3 URLRef.h
*** htdig/URLRef.h      1999/07/03 20:56:39     1.3
--- htdig/URLRef.h      1999/08/11 03:39:13
*************** public:
*** 35,40 ****
--- 35,42 ----
        void            SetURL(URL u)                   {url = u;}
        void            SetHopCount(int h)              {hopcount = h;}
        void            SetReferer(URL ref)             {referer = ref;}
+ 
+       int             compare(Object *to);
        
  private:
        URL             url;
Index: htlib/HtHeap.cc
===================================================================
RCS file: /opt/htdig/cvs/htdig3/htlib/HtHeap.cc,v
retrieving revision 1.2
diff -c -3 -p -r1.2 HtHeap.cc
*** htlib/HtHeap.cc     1999/03/14 03:26:21     1.2
--- htlib/HtHeap.cc     1999/08/11 03:39:13
*************** Object *HtHeap::Remove()
*** 92,98 ****
  {
    Object *min = Peek();
  
!   data->RemoveFrom(data->Count() - 1);
    
    if (data->Count() > 1)
      pushDownRoot(0);
--- 92,98 ----
  {
    Object *min = Peek();
  
!   data->RemoveFrom(0);
    
    if (data->Count() > 1)
      pushDownRoot(0);


------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED] containing the single word "unsubscribe" in
the SUBJECT of the message.

Reply via email to