Hello!
Geoff Hutchison wrote:
> The - argument will be integrated into the 3.2.x tree. I haven't
> personally had a chance to do it, but if someone else doesn't, I'll do it
> myself.
Thank you. And here is a simple HtFile class (attach 1) I mentioned in
one of my letters. I'm not insisting on integrating it or something,
just in case it can be useful. This class tries to read file in in
one operation, so it needs a little additional function to String
class (attach 2). Probably it is better to read by one line, than it
will be possible to open FIFO's as well as regular files. Attach 3
contains a little change to URL class (there is no need to strip
index.html from file:// URLs). And at last attach 4 contains changes
for Document class to create HtFile object on file:// URLs.
And another one thing. I think that CanBeParsed, isParsable, etc. stuff
does not belong to HtHTTP class. AFAIR it reports about parsability of
document using only content_type. So it probably should be standalone
utility class.
Alexis
diff -uN htdig-3-2-x.cvs.2/htnet/HtFile.cc htdig3-2-x/htnet/HtFile.cc
--- htdig-3-2-x.cvs.2/htnet/HtFile.cc Thu Jan 1 03:00:00 1970
+++ htdig3-2-x/htnet/HtFile.cc Thu Dec 16 22:13:31 1999
@@ -0,0 +1,125 @@
+//
+// HtFile.cc
+//
+// HtFile: Interface classes for retriving local documents
+//
+// Including:
+// - Generic class
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Public License version 2 or later
+// <http://www.gnu.org/copyleft/gpl.html>
+//
+// $Id$
+//
+
+#include "lib.h"
+#include "Transport.h"
+#include "HtFile.h"
+
+#include <signal.h>
+#include <sys/types.h>
+#include <ctype.h>
+#include <iostream.h>
+#include <stdio.h> // for sscanf
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fstream.h>
+
+
+///////
+ // HtFile_Response class
+///////
+
+
+// Construction
+
+HtFile_Response::HtFile_Response()
+{
+}
+
+
+// Destruction
+
+HtFile_Response::~HtFile_Response()
+{
+}
+
+///////
+ // HtFile generic class
+ //
+ //
+///////
+
+
+// Construction
+
+HtFile::HtFile()
+{
+ _modification_time=NULL;
+}
+
+// Destruction
+
+HtFile::~HtFile()
+{
+ // It's empty
+}
+
+
+///////
+ // Manages the requesting process
+///////
+
+HtFile::DocStatus HtFile::Request()
+{
+ // Reset the response
+ _response.Reset();
+
+ struct stat stat_buf;
+ // Check that it exists, and is a regular file.
+ // Should we allow FIFO's?
+ if (stat(_url.path(), &stat_buf) != 0 || !S_ISREG(stat_buf.st_mode))
+ return Document_not_found;
+ if (_modification_time && *_modification_time >= HtDateTime(stat_buf.st_mtime))
+ return Document_not_changed;
+
+ char *ext = strrchr(_url.path(), '.');
+ if (ext == NULL)
+ return Document_not_local;
+ if ((mystrcasecmp(ext, ".html") == 0) || (mystrcasecmp(ext, ".htm") == 0))
+ _response._content_type = "text/html";
+ else if (mystrcasecmp(ext, ".txt") == 0)
+ _response._content_type = "text/plain";
+ else
+ return Transport::Document_not_local;
+
+ _response._modification_time = new HtDateTime(stat_buf.st_mtime);
+
+ ifstream in(_url.path());
+ if (!in)
+ return Document_not_found;
+
+ _response._contents.allocate(stat_buf.st_size + 1);
+ in.read(_response._contents.get(), stat_buf.st_size);
+ _response._contents.setLength(in.tellg());
+
+ _response._content_length = in.tellg();
+ _response._document_length = in.tellg();
+ _response._status_code = 0;
+
+ if (debug > 2)
+ cout << "Read a total of " << _response._document_length << " bytes\n";
+ return Document_ok;
+}
+
+HtFile::DocStatus HtFile::GetDocumentStatus()
+{
+ // Let's give a look at the return status code
+ if (_response._status_code == -1)
+ return Document_not_found;
+ return Document_ok;
+}
+
diff -uN htdig-3-2-x.cvs.2/htnet/HtFile.h htdig3-2-x/htnet/HtFile.h
--- htdig-3-2-x.cvs.2/htnet/HtFile.h Thu Jan 1 03:00:00 1970
+++ htdig3-2-x/htnet/HtFile.h Thu Dec 16 22:13:21 1999
@@ -0,0 +1,120 @@
+//
+// HtFile.h
+//
+// HtFile: Class for local files (derived from Transport)
+//
+// Alexis Mikhailov, from HtHTTP.h by Gabriele Bartolini - Prato - Italia
+// started: 03.05.1999
+//
+// ////////////////////////////////////////////////////////////
+//
+// The HtFile class should provide an interface for retrieving local
+// documents. It derives from Transport class.
+//
+///////
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Public License version 2 or later
+// <http://www.gnu.org/copyleft/gpl.html>
+//
+// $Id$
+//
+
+#ifndef _HTFILE_H
+#define _HTFILE_H
+
+#include "Transport.h"
+#include "URL.h"
+#include "htString.h"
+
+
+// In advance declarations
+
+class HtFile;
+
+class HtFile_Response : public Transport_Response
+{
+
+ friend class HtFile; // declaring friendship
+
+ public:
+///////
+ // Construction / Destruction
+///////
+
+ HtFile_Response();
+ ~HtFile_Response();
+};
+
+class HtFile : public Transport
+{
+public:
+
+///////
+ // Construction/Destruction
+///////
+
+ HtFile();
+ ~HtFile();
+
+ // Information about the method to be used in the request
+
+ // manages a Transport request (method inherited from Transport class)
+ virtual DocStatus Request ();
+
+ ///////
+ // Interface for resource retrieving
+ ///////
+
+ // Set and get the document to be retrieved
+ void SetRequestURL(URL &u) { _url = u;}
+ URL GetRequestURL () { return _url;}
+
+
+ // Set and get the referring URL
+ void SetRefererURL (URL u) { _referer = u;}
+ URL GetRefererURL () { return _referer;}
+
+
+ ///////
+ // Interface for the HTTP Response
+ ///////
+
+ // We have a valid response only if the status code is not equal to
+ // initialization value
+
+ Transport_Response *GetResponse()
+ {
+ if (_response._status_code != -1)
+ return &_response;
+ else return NULL;}
+
+
+ // Get the document status
+ virtual DocStatus GetDocumentStatus();
+
+protected:
+
+///////
+ // Member attributes
+///////
+
+ ///////
+ // Http single Request information (Member attributes)
+ ///////
+
+ URL _url; // URL to retrieve
+ URL _referer; // Referring URL
+
+ ///////
+ // Http Response information
+ ///////
+
+ HtFile_Response _response; // Object where response
+ // information will be stored into
+};
+
+#endif
+
diff -ru htdig-3-2-x.cvs.2/htlib/String.cc htdig3-2-x/htlib/String.cc
--- htdig-3-2-x.cvs.2/htlib/String.cc Thu Dec 16 00:04:26 1999
+++ htdig3-2-x/htlib/String.cc Sat Dec 11 22:42:03 1999
@@ -631,6 +632,12 @@
" Data: " << ((void*) Data) << " '" << *this << "'\n";
}
+void String::setLength(int length)
+{
+ if (Allocated <= length)
+ allocate(length);
+ Length = length;
+}
int String::readLine(FILE *in)
{
diff -ru htdig-3-2-x.cvs.2/htlib/htString.h htdig3-2-x/htlib/htString.h
--- htdig-3-2-x.cvs.2/htlib/htString.h Thu Dec 16 00:04:26 1999
+++ htdig3-2-x/htlib/htString.h Fri Dec 10 00:16:14 1999
@@ -57,6 +57,7 @@
// allocation size, use the following member to set the size.
//
void allocate(int init) {reallocate_space(init);}
+ void setLength(int length);
//
// allocate space for a new char *, and copy the String in.
diff -ru htdig-3-2-x.cvs.2/htcommon/URL.cc htdig3-2-x/htcommon/URL.cc
--- htdig-3-2-x.cvs.2/htcommon/URL.cc Tue Dec 7 22:54:10 1999
+++ htdig3-2-x/htcommon/URL.cc Wed Dec 8 22:04:36 1999
@@ -440,7 +440,8 @@
_path.lowercase();
// And don't forget to remove index.html or similar file.
- removeIndex(_path);
+ if (strcmp((char*)_service, "file") != 0)
+ removeIndex(_path);
}
//*****************************************************************************
diff -ru htdig-3-2-x.cvs.2/htdig/Document.cc htdig3-2-x/htdig/Document.cc
--- htdig-3-2-x.cvs.2/htdig/Document.cc Mon Dec 6 22:53:30 1999
+++ htdig3-2-x/htdig/Document.cc Wed Dec 8 22:55:32 1999
@@ -57,6 +57,7 @@
contents = 0;
transportConnect = 0;
HTTPConnect = 0;
+ FileConnect = 0;
externalConnect = 0;
if (max_size > 0)
@@ -106,6 +107,8 @@
// We delete only the derived class objects
if (HTTPConnect)
delete HTTPConnect;
+ if (FileConnect)
+ delete FileConnect;
#if MEM_DEBUG
char *p = new char;
@@ -288,6 +291,35 @@
HTTPConnect->SetProxy(useproxy);
transportConnect = HTTPConnect;
}
+ else if (mystrncasecmp(url->service(), "file", 4) == 0)
+ {
+ if (!FileConnect)
+ {
+ if (debug>4)
+ cout << "Creating an HtFile object" << endl;
+
+ FileConnect = new HtFile();
+
+ if (!FileConnect)
+ return Transport::Document_other_error;
+ }
+
+ if (FileConnect)
+ {
+ // Here we must set only thing for a file request
+
+ FileConnect->SetRequestURL(*url);
+
+ // Set the referer
+ if (referer)
+ FileConnect->SetRefererURL(*referer);
+
+ if (debug > 2)
+ cout << "Making 'file' request on " << url->get() << endl;
+ }
+
+ transportConnect = FileConnect;
+ }
else
{
if (debug)
diff -ru htdig-3-2-x.cvs.2/htdig/Document.h htdig3-2-x/htdig/Document.h
--- htdig-3-2-x.cvs.2/htdig/Document.h Sun Nov 28 05:45:10 1999
+++ htdig3-2-x/htdig/Document.h Tue Dec 7 22:36:49 1999
@@ -28,6 +28,7 @@
#include "htString.h"
#include "Transport.h"
#include "HtHTTP.h"
+#include "HtFile.h"
#include "ExternalTransport.h"
class Connection;
@@ -102,6 +103,7 @@
Transport *transportConnect;
HtHTTP *HTTPConnect;
+ HtFile *FileConnect;
ExternalTransport *externalConnect;
------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED]
You will receive a message to confirm this.