Hi everyone,

I am trying to use the 'url' method of monit 4.5.1, and have come across
what seems to be a bug.  Though the docs claim that monit will grab the
first 1MB of a file if the HTTP server does not specify a Content-Length
header, it seems to only grab about the first 970ish bytes.

I first noticed this because my url content regexp test was failing,
though it seemed as if it should succeed.  I tried matching different
strings at different places in the document returned until I settled on
about 1000 characters.

I went poking around the code and found the HTTP_CONTENT_MAX variable,
which seemed to limit the amount of data scarfed if there was no
Content-Length header:
[excerpt from protocols/http.c]

  if(H->content_length < 0) /* Not defined in response */
    H->content_length= HTTP_CONTENT_MAX;
  else if(H->content_length > HTTP_CONTENT_MAX)
    H->content_length= HTTP_CONTENT_MAX;

I put in some debugging lines around this to make sure it was actually
being run as I expected, and H->content_length is being set correctly.

I also put in a debugging line to print out what is contained in the
buffer allocated for the page, against which the regexp is tested.

I found that though reducing HTTP_CONTENT_MAX below 1000ish decreased
the amount of the page that was in the buffer, increasing it did not.
there was a cutoff at about 970, over which it seems to ignore
H->content_length.  

I'm not up on my socket foo to go any further, but I wonder if anybody
has any insight into this problem.  

I've attached protocols/http.c, my monitrc file (you're welcome to hit
my server to repeat the experiment), and the debugging output of
'./monit -I -v -c ./monitrc'

-ben

-- 
Ben Hartshorne
email: [EMAIL PROTECTED]
http://ben.hartshorne.net
set daemon 12 # Poll at 2-minute intervals
set httpd port 8094 and use address localhost
        allow localhost
        allow admin:monit
set logfile /home/ben/monit-4.5.1/benslogfile #what does this do?
set alert [EMAIL PROTECTED] # no global address neede

check host SearchNoResults with address www.simplyhired.com
        if failed url
                
http://www.simplyhired.com/index.php?state=searchresult&miles=75&maxResultsPerPage=10&&keywords=plumber&location=94110&submit_btn=search
                and content == "var maxpage  = [1-9]"
                with timeout 30 seconds
                then alert
/*
 * Copyright (C), 2000-2005 by the monit project group.
 * All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

#include <config.h>

#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif

#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif

#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif

#ifdef HAVE_REGEX_H
#include <regex.h>
#endif

#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif

#ifdef HAVE_STRING_H
#include <string.h>
#endif

#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif

#include "md5.h"
#include "sha.h"
#include "base64.h"
#include "protocol.h"
#include "httpstatus.h"


/**
 *  A HTTP test.
 *
 *  We send the following request to the server:
 *  'GET / HTTP/1.1'             ... if request statement isn't defined
 *  'GET /custom/page  HTTP/1.1' ... if request statement is defined
 *  and check the server's status code.
 *
 *  If the statement defines hostname, it's used in the 'Host:' header
 * otherwise a default (empty) Host header is set.
 *
 *  If the status code is >= 400, an error has occurred.
 *  Return TRUE if the status code is OK, otherwise FALSE.
 *
 *  The function tries to follow redirects from the server and will
 *  try to authenticate using Basic Auth if required by the server.
 *
 *  @author Jan-Henrik Haukeland, <[EMAIL PROTECTED]>
 *  @author Martin Pala, <[EMAIL PROTECTED]>
 *  @version \$Id: http.c,v 1.42 2005/03/23 22:57:34 hauk Exp $
 *  @file
 */



/* ------------------------------------------------------------- Definitions */


#undef   READ_SIZE
#define  READ_SIZE  8192
#define  LINE_SIZE  512

typedef struct {
  Socket_T s;
  int closed;
  int status;
  long content_length;
  char cookie[LINE_SIZE];
  char location[LINE_SIZE];
} Http_T;


/* -------------------------------------------------------------- Prototypes */


static int do_redirect(Http_T *H);
static int do_regex(Http_T *h, Request_T R);
static int check_request(Socket_T s, Port_T P);
static int get_response(Socket_T s, Http_T *H);
static char *get_auth_header(Port_T P, char *auth, int l);
static int check_request_checksum(Http_T *H, char *checksum, int hashtype);


/* ------------------------------------------------------------------ Public */


int check_http(Socket_T s) {

  Port_T P;
  char host[STRLEN];
  char auth[STRLEN]= {0};
  const char *request= NULL;
  
  ASSERT(s);

  P= socket_get_Port(s);

  ASSERT(P);
  
  request= P->request?P->request:"/";
 
  /* The connection is kept open in case we have to follow a
     redirect */
  if(socket_print(s, 
		  "GET %s HTTP/1.1\r\n"
		  "Host: %s\r\n"
		  "Accept: */*\r\n"
		  "User-Agent: %s/%s\r\n"
		  "%s\r\n",
		  request, Util_getHTTPHostHeader(s, host, STRLEN), 
		  prog, VERSION, get_auth_header(P, auth, STRLEN)) < 0) {
    log("HTTP: error sending data -- %s\n", STRERROR);
    return FALSE;
  }

  return check_request(s, P);
  
}


/* ----------------------------------------------------------------- Private */


/**
 * Check that the server returns a valid HTTP response
 * @param s A socket
 * @return TRUE if the response is valid otherwise FALSE
 */
static int check_request(Socket_T s, Port_T P) {

  Http_T H;

  memset(&H, 0, sizeof(H));

  /* Read the response and assign those headers we are interested in */
  if(! get_response(s, &H)) {
    return FALSE;
  }

  switch(H.status) {
    
  case SC_OK: 
    break;
	  
  case SC_MOVED_TEMPORARILY:
    if(!do_redirect(&H)) {
      DEBUG("HTTP warning: cannot follow redirect\n");
      /* The server did respond so this is not strictly an error, but
	 it's probably an error to continue from here */
      return TRUE;
    }
    break;
	  
  default:
    if(H.status>=400) {
      log("HTTP error: Server returned status %d\n", H.status);
      return FALSE;
    }
    
  }
  
  if(P->url_request && P->url_request->regex) {
    if(! do_regex(&H, P->url_request)) {
      log("HTTP error: Failed regular expression test on content"
	  " returned from server\n");
      return FALSE;
    }
  }

  if(P->request_checksum) {
    return check_request_checksum(&H, P->request_checksum, P->request_hashtype);
  }
  
  return TRUE;
  
}


static int check_request_checksum(Http_T *H, char *checksum, int hashtype) {

  Socket_T s= H->s;
  long content_length= H->content_length;

  if(! content_length) {
    DEBUG("HTTP warning: Response does not contain Content-Length\n");
  } else {
    
    char *r;
    int n, i;
    long size;
    char result[STRLEN];
    char buf[READ_SIZE];
    unsigned char hash[STRLEN];
    int  keylength=0;

    switch (hashtype) {
    case HASH_MD5:
      {
	struct md5_ctx ctx;
	md5_init_ctx(&ctx);
	while(content_length > 0) {
	  size= content_length>READ_SIZE?READ_SIZE:content_length;
	  n= socket_read(s, buf, size);
	  if(n<0) break;
	  md5_process_bytes(buf, n, &ctx);
	  content_length -= n; 
	}
	md5_finish_ctx(&ctx, hash);
	keylength=16; /* Raw key bytes not string chars! */
	break;
      }
    case HASH_SHA1: 
      {
	struct sha_ctx ctx;
	sha_init_ctx(&ctx);
	while(content_length > 0) {
	  size= content_length>READ_SIZE?READ_SIZE:content_length;
	  n= socket_read(s, buf, size);
	  if(n<0) break;
	  sha_process_bytes(buf, n, &ctx);
	  content_length -= n; 
	}
	sha_finish_ctx(&ctx, hash);
	keylength=20; /* Raw key bytes not string chars! */
	break;
      }
    default:
      DEBUG("HTTP warning: Unknown hash type\n");
      return FALSE;
    }
    r= result;
    for(i= 0; i < keylength; ++i) {
      r+= snprintf(r, STRLEN-(r-result) ,"%02x", hash[i]);
    }
    if(strncasecmp(result, checksum, keylength*2) != 0) {
      DEBUG("HTTP warning: Document checksum mismatch\n");
      return FALSE;
    } else {
      DEBUG("HTTP: Succeeded testing document checksum\n");
    }
  }
  
  return TRUE;
  
}


static int do_regex(Http_T *H, Request_T R) {

  int n;
  int rv= TRUE;
  unsigned char *buf= NULL;
#ifdef HAVE_REGEX_H
  int regex_return;
#else
  char *regex_return;
#endif

  if(R->regex == NULL) {
    return TRUE;
  }

  if(H->content_length == 0) {
    log("HTTP error: Cannot test regex -- No content returned from server\n");
    return FALSE;
  }

  DEBUG("Content length was %ld \n", H->content_length);
 
  if(H->content_length < 0) /* Not defined in response */
    H->content_length= HTTP_CONTENT_MAX;
  else if(H->content_length > HTTP_CONTENT_MAX)
    H->content_length= HTTP_CONTENT_MAX;

  DEBUG("Content length is now %ld \n", H->content_length);
    
  buf= xmalloc(H->content_length);

  n= socket_read(H->s, buf, H->content_length);
  if(n<=0) {
    rv= FALSE;
    log("HTTP: error receiving data -- %s\n", STRERROR);
    goto error;
  }
  buf[n]= 0;

  DEBUG("*****\nCaptured %d bytes: %s\n***** end captured output *****\n", n, buf);

#ifdef HAVE_REGEX_H

      regex_return=regexec(R->regex,
			   buf,
			   0,
			   NULL,
			   0);
      switch(R->operator) {

      case OPERATOR_EQUAL:
	if(regex_return!=0) {
	  rv= FALSE;
	} else {
	  DEBUG("HTTP: Regular expression test succeeded\n");
	  
	}
	break;

      case OPERATOR_NOTEQUAL:
	if(regex_return == 0) {
	  rv= FALSE;
	} else {
	  DEBUG("HTTP: Regular expression test succeeded\n");
	  
	}
	break;

      default:
	log("HTTP error: Invalid content operator\n");
      }
	
#else
      /* w/o regex support */

      regex_return= strstr(buf, R->regex);
      switch(R->operator) {

      case OPERATOR_EQUAL:
	if(!regex_return) {
	  rv= FALSE;
	  DEBUG("HTTP: Regular expression does not match\n");
	}
	break;

      case OPERATOR_NOTEQUAL:
	if(regex_return) {
	  rv= FALSE;
	  DEBUG("HTTP: Regular expression match\n");
	}
	break;

      default:
	log("HTTP error: Invalid content operator\n");
      }
      
#endif
  
error:
  FREE(buf);
  return rv;
  
}


/* -------------------------------------------------------- State management */


static int do_redirect(Http_T *H) {

  char host[STRLEN];
  char auth[STRLEN]= {0};
  Port_T P= socket_get_Port(H->s);

  if(!*H->location) {
    DEBUG("HTTP error: Missing Location header in response\n");
    return FALSE;
  }

  if(H->closed) {
    DEBUG("HTTP warning: Aborting test, connection was closed\n");
    return FALSE;
  }

  if(socket_print(H->s, 
		  "GET %s HTTP/1.1\r\n"
		  "Host: %s\r\n"
		  "Accept: */*\r\n"
		  "User-Agent: %s/%s\r\n"
		  "Cookie: %s\r\n"
		  "%s\r\n", 
		  H->location, Util_getHTTPHostHeader(H->s, host,STRLEN), 
		  prog, VERSION, H->cookie, get_auth_header(P, auth, STRLEN))
     < 0) {
    DEBUG("HTTP error: Failed sending data -- %s\n", STRERROR);
    return FALSE;
  }

  if(! get_response(H->s, H)) {
    return FALSE;
  }

  switch(H->status) {

  case SC_OK: 
    break;

  case SC_MOVED_TEMPORARILY:
    if(!do_redirect(H)) {
      return FALSE;
    }
    break;

  default:
    if(H->status>=400) {
      return FALSE;
    }

  }

  return TRUE;

}


static int get_response(Socket_T s, Http_T *H) {

  char buf[LINE_SIZE];

  H->s= s;
  *H->location= 0;
  H->content_length= -1;

  if(! socket_readln(H->s, buf, LINE_SIZE)) {
    log("HTTP: error receiving data -- %s\n", STRERROR);
    return FALSE;
  }

  Util_chomp(buf);

  if(sscanf(buf, "%*s %d", &H->status) !=1) {
    log("HTTP error: cannot parse HTTP status in response: %s\n", buf);
    return FALSE;
  }

  while(NULL != socket_readln(s, buf, LINE_SIZE)) {

    if(Util_startsWith(buf, "\r\n") || Util_startsWith(buf, "\n"))
      break;

    Util_chomp(buf);
   
    if(Util_startsWith(buf, "Content-Length")) {
      if(1 != sscanf(buf, "%*s%*[: ]%ld", &H->content_length)) {
	log("HTTP error: parsing Content-Length response header '%s'\n", buf);
	return FALSE;
      }
      if(H->content_length < 0) {
	log("HTTP error: Illegal Content-Length response header '%s'\n", buf);
	return FALSE;
      }
    }
    
    if(Util_startsWith(buf, "Location")) {
      if(1 != sscanf(buf, "%*s%*[: ]%s", H->location)) {
	log("HTTP error: parsing Location response header '%s'\n", buf);
	return FALSE;
      }
    }
    
    if(Util_startsWith(buf, "Connection")) {
      char *p= strchr(buf, ':');
      if(!p) {
	log("HTTP error: parsing Connection response header '%s'\n", buf);
	return FALSE;
      }
      Util_trim(++p);
      H->closed= Util_startsWith(p, "close");
    }
    
    if(Util_startsWith(buf, "Set-Cookie")) {
      char *s;
      char *p= strchr(buf, ':');
      if(!p) {
	log("HTTP error: parsing Cookie response header '%s'\n", buf);
	return FALSE;
      }
      Util_trim(++p);
      /* Strip away cookie-av elements. */ 
again:
      if((s= strstr(p, "Comment")) ||
	 (s= strstr(p, "Domain"))  || 
	 (s= strstr(p, "Max-Age")) || 
	 (s= strstr(p, "Secure"))  || 
	 (s= strstr(p, "Version")) || 
	 (s= strstr(p, "Path"))) {
	*s= 0;
	goto again;
      }
      snprintf(H->cookie, LINE_SIZE, "%s", Util_trim(p));
    }
    
  }

  return TRUE;

}


static char *get_auth_header(Port_T P, char *auth, int l) {
  
  char *b64;
  char buf[STRLEN];
  char *username= NULL;
  char *password= NULL;
  
  if(P->url_request) {
    URL_T U=  P->url_request->url;
    if(U) {
      username= U->user;
      password= U->password;
    }
  }
      
  if(! (username && password)) {
    return auth;
  } 

  snprintf(buf, STRLEN, "%s:%s", username, password);
  if(! (b64= encode_base64(strlen(buf), (unsigned char *)buf)) ) {
    return auth;
  }

  snprintf(auth, l, "Authorization: Basic %s\r\n", b64);
  FREE(b64);
  
  return auth;

}
monit: Debug: Adding host allow 'localhost' (127.0.0.1).
monit: Debug: Adding credentials for user 'admin'.
Starting monit daemon with http interface at [localhost:8094]
Starting monit HTTP server at [localhost:8094]
monit HTTP server started
'system' load average [2.32][1.84][1.51]
'system' memory usage 44.7% [231076 kB]
'system' cpu usage 0.0%us 0.0%sy 0.0%wa
'SearchNoResults' succeeded connecting to INET[www.simplyhired.com:80]
Content length was -1 
Content length is now 10240 
*****
Captured 970 bytes: a5f3
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
<html id="html" dir="ltr" xml:lang="en" lang="en" 
xmlns="http://www.w3.org/1999/xhtml";>
<head>
<title>plumber jobs - 94110 | simplyhired.com </title>
<link rel="shortcut icon" type="image/x-icon" href="images/favicon.ico" />
<link rel="stylesheet" type="text/css" media="screen" href="styles/common.css" 
/>
<link rel="stylesheet" type="text/css" media="screen" href="styles/static.css" 
/>
<link rel="stylesheet" type="text/css" media="screen" href="styles/results.css" 
/>
<link rel="stylesheet" type="text/css" media="print" href="styles/print.css" />
<script type="text/javascript" language="javascript">
<!--

var maxpage  = 6 ;
var sortby   = "" ;
var state    = "" ;
var keywords = "plumber" ;
var locationjs = "94110" ;
var locnormjs = "";
var resultsperpage = 10 ;
eval("var jo
***** end captured output *****
HTTP: Regular expression test succeeded
'SearchNoResults' succeeded testing protocol [HTTP] at 
INET[www.simplyhired.com:80]
Shutting down monit HTTP server
monit HTTP server stopped
monit daemon with pid [16412] killed
Runtime constants:
 Control file       = ./monitrc
 Log file           = /home/ben/monit-4.5.1/benslogfile
 Pid file           = /home/ben/.monit.pid
 Debug              = True
 Log                = True
 Use syslog         = False
 Is Daemon          = True
 Use process engine = True
 Poll time          = 12 seconds
 Mail server(s)     = localhost
 Mail from          = (not defined)
 Mail subject       = (not defined)
 Mail message       = (not defined)
 Start monit httpd  = True
 httpd bind address = localhost
 httpd portnumber   = 8094
 httpd signature    = True
 Use ssl encryption = False
 httpd auth. style  = Basic Authentication and Host/Net allow list
 Alert mail to      = [EMAIL PROTECTED]
 Alert on           = All events

The service list contains the following entries:

Remote Host Name      = SearchNoResults
 Group                = (not defined)
 Monitoring mode      = active
 Port                 = if failed 
www.simplyhired.com:80/index.php?state=searchresult&miles=75&maxResultsPerPage=10&&keywords=plumber&location=94110&submit_btn=search
 [protocol HTTP] with timeout 30 seconds then alert else if recovered then alert

-------------------------------------------------------------------------------

Attachment: signature.asc
Description: Digital signature

_______________________________________________
monit-dev mailing list
[email protected]
http://lists.nongnu.org/mailman/listinfo/monit-dev

Reply via email to