#!/usr/bin/perl -w

use strict;

use LWP::UserAgent;
use HTTP::Cookies;
use HTML::Entities;
use HTML::Form;
use Term::Readline;
use Getopt::Long;
use Pod::Usage;
use File::Basename;
use Data::DumpXML qw(dump_xml);
use Data::DumpXML::Parser;
use HTML::LinkExtor;
use URI::URL;
use Compress::Zlib;


# globals
my $program_id 		= basename($0);
my $version 		= '0.9';

# command line arguments:
my $RECORD		='';	# recording a new script
my $PLAYBACK	='';	# playback an existing script
my $EXTEND		='';	# extend an existing script
							# note that --record & --playback & --extend are mutually exclusive
my $VERSION		='';    
my $HELP			='';  # print usage info
my $MAN			='';  # not yet implemented - need more info in POD format

# with arguments
my $FILE;     

# Process command line options
GetOptions(	'record'		=> \$RECORD
			,'playback'		=> \$PLAYBACK
			,'extend'		=> \$EXTEND
			,'version'		=> \$VERSION
			,'help|?'		=> \$HELP
			,'man'			=> \$MAN
			,'file=s'		=> \$FILE
			) or pod2usage(2);  # prints SYNOPSIS & OPTIONS/ARGUMENTS sections

if ($VERSION) {
	print "\nThis is $program_id v$version\n";
	exit;
}
#pod2usage(1) if (($HELP) or (@ARGV==0) or ($RECORD and $PLAYBACK) or ($RECORD and $EXTEND) or ($EXTEND and $PLAYBACK) ); # prints SYNOPSIS only
pod2usage(1) if (($HELP) or (!$FILE) or ($RECORD and $PLAYBACK) or ($RECORD and $EXTEND) or ($EXTEND and $PLAYBACK) ); # prints SYNOPSIS only
pod2usage(-exitstatus => 0, -verbose => 2) if $MAN; # prints SYNOPSIS & OPTIONS/ARGUMENTS sections

my $term = new Term::ReadLine 'Perl Web Scripter';
my $OUT = $term->OUT || <STDOUT>;

my ($ua, @urls, @post_data, @requests, $sub_request, $last_response);

$ua = LWP::UserAgent->new(keep_alive => 10);
$ua->cookie_jar( HTTP::Cookies->new );
$ua->agent('Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)');

# base is used to "absolutize" any relative URI's in the requests list
#my $base = 'https://cust.insure.statefarm.com';

my $req_nbr = 0;

if ($PLAYBACK) {
	die "Playback function not yet implemented\n";		
	my $blesser = sub {
		my ( $ref, $class ) = @_;
		bless $ref, $class;
	};

	my $p = Data::DumpXML::Parser->new(Blesser => $blesser);
	#my $p = Data::DumpXML::Parser->new;
	
	my $requests_ref = $p->parsefile($FILE);
	
	@requests = @$requests_ref;
	
	foreach my $req (@requests) {
		$sub_request = 1;
		execute_request($req);
	}
} elsif ($EXTEND) {
	die "Extend function not yet implemented\n";
} else {

	# record a new script
	open SCRIPT,">  $FILE"  or die "can't open output file $FILE: $!";
	
	# Bookmarks should really come from an external file, with functions to add/modify/remove them
	# for now, they are hardcoded here. 
	# Also, should provide ability to type in a url, not using a bookmark
	my @bookmarks;

	push @bookmarks, 'https://your.host.com:your-port/your/path?your=parms';
	push @bookmarks, 'http://your.host.com:your-port/your/path?your=parms';	
	
	
	my $index;
	print "\nBookmarks:\n";
	for ($index = 0; $index < @bookmarks; $index++) {
			print "(",$index + 1,")\t", $bookmarks[$index], "\n";
	}
	print "\n";
	
	my $selected_bookmark_nbr = 0;

	while ($selected_bookmark_nbr ne 'q' && $selected_bookmark_nbr ne 'o' && ($selected_bookmark_nbr < 1 or $selected_bookmark_nbr > $index)) {
		$selected_bookmark_nbr = &promptUser("Select bookmark number, \"o\" for other url, or \"q\" to quit", "1");
	}
	if ($selected_bookmark_nbr eq 'q') {
		exit;
	}
	my $url;
	if ($selected_bookmark_nbr eq 'o') {
		# process user-entered URL
		$url = &promptUser("Enter URL or \"q\" to quit", "q");
		if ($url eq 'q') {
			exit;
		}
	} else {
		# process selected bookmark
		$url = $bookmarks[--$selected_bookmark_nbr];
	}
	
	my	$req = HTTP::Request->new('GET', URI->new($url));

	my $quit_requested = 0;

	while ($req && !$quit_requested) {

		$requests[$req_nbr++] = $req;
		
		$sub_request = 1;

		my $response = execute_request($req);
		$req = undef;

		#print "\nDone retrieving -- looking for forms\n";
		#print "Last response content = \n", $last_response->content;

		my @forms = HTML::Form->parse($last_response->content, $last_response->base());
		
		my @links = findLinks($last_response);
		
		if (@forms == 0 && @links == 0) {
			print "\nno forms or links found - html content logged to noclicks.htm\n";
			open LOG,">  noclicks.htm"  or die "can't open log file noclicks.htm: $!";
			print LOG $response->content;
			close LOG;
			$quit_requested = 1;			
		} else {
			$req = processUserInput(\@forms, \@links);			
		}
	}
	my $requests_xml = dump_xml(@requests);
	print SCRIPT $requests_xml;
	close SCRIPT;
}

exit;

sub findLinks {

	my ($response) = @_;

	#print "\nlooking for links\n";

	# Set up a callback that collects links
	my @links = ();
	sub callback {
		my($tag, %attr) = @_;
		return if $tag ne 'a';  # we only look closer at <a ...>
		push(@links, values %attr);
	}

	# Make the parser. 
	my $p = HTML::LinkExtor->new(\&callback, $response->base);

	# parse response
	if (!$p->parse($response->content)) {
		print "\nLinkExtor parse error\n";
	}
	return @links;
}

sub printForm {
	
	my ($editable_ref, $clickable_ref, $selected_form_nbr) = @_;
	
	my @editable_inputs = @$editable_ref;
	my @clickable_inputs = @$clickable_ref;

	print "\nWorking with form #",$selected_form_nbr + 1,":\n";

	my $choices = 0;
	if (@editable_inputs > 0) {
		print "\tFound the following editable inputs:\n";
		foreach my $input (@editable_inputs) {
			print "\t\t",++$choices,"\t",$input->name,"\t",$input->type,"\t",$input->value ? $input->value : '',"\n";
		}
	} else {
		print "\tFound no editable inputs\n";
	}
	if (@clickable_inputs > 0) {
		print "\tFound the following clickable inputs:\n";
		foreach my $input (@clickable_inputs) {
			print "\t\t",++$choices,"\t",$input->name,"\t",$input->type,"\n";
		}
	} else {
		print "\tFound no clickable inputs\n";
	}
	print "\n";
}


sub processUserInput {

	my ($forms_ref, $links_ref) = @_;
	my @forms = @$forms_ref;
	my @links = @$links_ref;
	
	my $req = undef;
	
	# sort links (case insensitive)
	@links = sort {uc($a) cmp uc($b)} @links;
	
	# get rid of duplicate links (case insensitive)
	my @temp_links;
	my %links_seen;
	foreach (@links) {
	    push(@temp_links, $_) unless ($links_seen{uc($_)}++);
	}
	@links = @temp_links;
	
	my $input_nbr = 0;
	if (@forms > 0) {
		print "\nForms:\n";	
		for (my $index = 0; $index < @forms; $index++) {
			print "(",++$input_nbr,")\n";
			$forms[$index]->dump;
			print "\n";
		}
	}
	if (@links > 0) {
		print "\nHyperLinks:\n";
		for (my $index = 0; $index < @links; $index++) {
				print "(",++$input_nbr,")\t", $links[$index], "\n";
		}
		print "\n";
	}
	
	my $selected_input_nbr = 0;
	my $selected_form_nbr = 0;
	if ($input_nbr > 1) {
		while ($selected_input_nbr ne 'q' && ($selected_input_nbr < 1 or $selected_input_nbr > $input_nbr)) {
			$selected_input_nbr = &promptUser("Select form or link number or \"q\" to quit", "1");
		}
		if ($selected_input_nbr eq 'q') {
			return $req;
		}
		$selected_input_nbr--;
		if ($selected_input_nbr < @forms) {
			$selected_form_nbr = $selected_input_nbr;
		} else {
			$selected_input_nbr -= @forms;
			my	$URI = URI->new($links[$selected_input_nbr]);
			$req = HTTP::Request->new('GET', $URI);
			return $req;
		}
	}

	while (!$req) {	

		my @inputs = $forms[$selected_form_nbr]->inputs;

		my (@editable_inputs, @clickable_inputs);

		foreach my $input (@inputs) {
			if ($input->type ne "hidden") {
				if ($input->type ne "image" && $input->type ne "submit") {
					push @editable_inputs, $input;	
				} else {
					push @clickable_inputs, $input;	
				}
			}
		}

		printForm(\@editable_inputs, \@clickable_inputs, $selected_form_nbr);

		if (@clickable_inputs == 1 && @editable_inputs == 0) {
			my $submit ='';
			while ($submit ne 'y' && $submit ne 'n' && $submit ne 'q') {
				$submit = &promptUser("Submit form (y/n) or \"q\" to quit", "y");
			}
			if ($submit eq 'q') {
				#$quit_requested = 1;
				#last;
				return undef;
			} elsif ($submit eq 'y') {
				#print "\tSubmitting form. . . \n";
				$req = $forms[$selected_form_nbr]->click($clickable_inputs[0]->name);
				return $req;
			}
		} else {
			my $all_inputs_done = 0;
			while (!$req) {			
				my $action = 0;
				while ( $action ne 'a' and $action ne 'q' and ($action < 1 or $action >= (@editable_inputs + @clickable_inputs + 1)) ) {
					$action = &promptUser("Enter input number to work with \"a\" for all or \"q\" to quit", $all_inputs_done ? (@editable_inputs + 1) : 'a') ;
				}
				if ($action eq 'q') {
					#$quit_requested = 1;
					#last;
					return undef;
				} elsif ($action eq 'a') {
					foreach my $input (@editable_inputs) {
						if (!getInputValue($input)) {
							#$quit_requested = 1;
							#last;
							return undef;
						}
						printForm(\@editable_inputs, \@clickable_inputs, $selected_form_nbr);
					}
					$all_inputs_done = 1;					
				} elsif ($action > @editable_inputs) {
					$action = $action - @editable_inputs - 1;
					#print "you chose to click on clickable input # $action\n";
					$req = $forms[$selected_form_nbr]->click($clickable_inputs[$action]->name);			
					return $req;
				} else {
					$action--;
					#print "you chose to edit editable input # ",$action + 1,"\n";
					if (!getInputValue($editable_inputs[$action])) {
						#$quit_requested = 1;
						#last;
						return undef;
					}
				}
			}
		}
	}
	return undef;
}

sub getInputValue  {
	
	my ($input) = @_;
	
	if (my @possible_values = $input->possible_values) {
		my $choices = 0;
		print "\n";
		foreach my $value (@possible_values) {
			print "\t\t",++$choices,"\t",$value,"\n";
		}
		my $selected_value = 0;
		while ($selected_value ne 'n' and $selected_value ne 'q' and ($selected_value < 1 or $selected_value > @possible_values)) {
	      my $prompt = 	"\n\nEnter choice nbr for \"".$input->name."\", \"n\" for none, \"q\" to quit";
			$selected_value = &promptUser($prompt, "n");	
		}
		if ($selected_value eq 'q') {		
			return 0;
		} elsif ($selected_value ne 'n') {
			$selected_value--;
			#print "setting value to ",$possible_values[$selected_value],"\n";
			$input->value($possible_values[$selected_value]);
		}
	} else {
		print "\n";
		my $selected_value = &promptUser($input->name, $input->value);	
		#print "setting value to ",$selected_value,"\n";					
		$input->value($selected_value);					
	}
	return 1;
}

#----------------------------(  promptUser  )-----------------------------#
#                                                                         #
#  FUNCTION:	promptUser                                                #
#                                                                         #
#  PURPOSE:	Prompt the user for some type of input, and return the    #
#		input back to the calling program.                        #
#                                                                         #
#  ARGS:	$promptString - what you want to prompt the user with     #
#		$defaultValue - (optional) a default value for the prompt #
#                                                                         #
#-------------------------------------------------------------------------#

sub promptUser {

   #-------------------------------------------------------------------#
   #  two possible input arguments - $promptString, and $defaultValue  #
   #  make the input arguments local variables.                        #
   #-------------------------------------------------------------------#

   my($promptString, $defaultValue) = @_;

   if ($defaultValue) {
      $_ = $term->readline("$promptString [$defaultValue]: " )
   } else {
   	$_ = $term->readline("$promptString: " )
   }

   #------------------------------------------------------------------#
   # remove the newline character from the end of the input the user  #
   # gave us.                                                         #
   #------------------------------------------------------------------#

   chomp;
   
   #------------------------------------------------------------------#
   # store this input to the readline library's command history       #
   #                                                                  #
   #------------------------------------------------------------------#

   $term->addhistory($_) if /\S/;

   #-----------------------------------------------------------------#
   #  if we had a $default value, and the user gave us input, then   #
   #  return the input; if we had a default, and they gave us no     #
   #  no input, return the $defaultValue.                            #
   #                                                                 # 
   #  if we did not have a default value, then just return whatever  #
   #  the user gave us.  if they just hit the  key,                  #
   #  the calling routine will have to deal with that.               #
   #-----------------------------------------------------------------#

   if ($defaultValue) {
      return $_ ? $_ : $defaultValue;    # return $_ if it has a value
   } else {
      return $_;
   }
}

sub get_url {

   my($base,$url, $post_data) = @_;
  
   my $URI;
	
	if ($base) {
		$URI = URI->new_abs($url,$base),
	} else {
		$URI = URI->new($url),
	}
		
	my $req;
	
	if ($post_data) {
		$req = HTTP::Request->new('POST', $URI);
		$req->header('Content-Type' => 'application/x-www-form-urlencoded');
		$req->content($post_data);
	} else {
		$req = HTTP::Request->new('GET', $URI);
	}
	
	my $response = execute_request($req);
	return $response;
}

sub execute_request {

	my($req) = @_;
	
	$req->header('Accept-encoding' => 'gzip, deflate');
	
	print "\nReq #$req_nbr.$sub_request:\n",$req->as_string;

	my $res = $ua->request($req);
	
	$last_response = $res;
	if ($res->is_success) {
		print "\nResp Headers #$req_nbr.$sub_request:\n";
		print $res->protocol, " ", $res->status_line, "\n";
		print $res->headers_as_string;
		my $follow_url = undef;	 
		if ( $res->header('Content-encoding')) {
			my $encoding = $res->header('Content-encoding');
			if ($encoding =~ /deflate/o ) {
				# process deflate encoding
				print "\ndeflate encoding found\n";
				$res->content(inflateBody($res->content_ref));
			} elsif ($encoding =~ /gzip/o ) {
				# process gzip encoding
				print "\ngzip encoding found\n";
				die "gzip decompression not yet implemented\n";
			} else {
				die "unsuported content-encoding $encoding returned by browser\n";
			}
		}
		if ( $res->header('Refresh') && $res->header('Refresh') =~ /url=(.*)/o ) {

			print "\nFollowing re-direct via Refresh found in HTTP headers or HTML <head>\n";
			$follow_url = $1;

		} elsif ($res->content && $res->content =~ /document\.location\.href = \"(.+)\"/o ) {

			print "\nFollowing re-direct via script document.location.href = \n";
			$follow_url = $1;

		} elsif ($res->content && $res->content =~ /<meta\s+http-equiv="?Refresh"?\s+content\s*=\s*"\d*;\s*[Uu][Rr][Ll]=(.*?)"/o) { 

			print "\nFollowing re-direct via http-equiv=Refresh found in HTML <body> or compressed <head>\n";
			$follow_url = $1;
		}
		if ($follow_url) {
			decode_entities($follow_url);
			my $base = $req->uri->scheme."://".$req->uri->authority;
			#print "\nbase=$base\n";
			$sub_request++;

			get_url(URI->new_abs($follow_url,$base));	
		}
	} else {
		 print "\nFailed: ", $res->status_line;
	}
	return $res;	
}

sub inflateBody {

		my ($body_ref) = @_;
		my $body = $$body_ref;
		
		my $result = undef;
		
		my $deflated_length = length($body);
		my $i = inflateInit( -WindowBits => -15 ) or die "Cannot create an inflation stream\n" ;
		($result, my $status) = $i->inflate($body);
		if ($status == Z_OK or $status == Z_STREAM_END) {
			my $inflated_length = length($result);
			my $compression_ratio = int((1 - $deflated_length/$inflated_length) * 100);
			print "\nContent-length after inflate = $inflated_length  savings = $compression_ratio%\n\n";
			#decode_entities($result); # get rid of HTML entities ("&amp;" etc.)
			#if (length($result) <= 1024) { # only print the small ones (redirects)
			#	print "\nBody =\n$result\n";
			#} 
		} else {
			print "\nInflation failed: status = $status\n";
			print "\nBody length after failed inflate = ",length($result),"\n";
			print "\n1st 40 body char codes after failed inflate =\n";
			for (my $j=0;$j<39;$j++) {
				print ord(substr($body,$j,1)), " ";
			}
			print "\n";
			if (defined($result)) { print "\nInflated body = $result\n";}
		}
		return $result;
}

__END__

=head1 NAME

web_script -	Record, playback, or extend scripted HTTP(S) sessions in a text-based browser format.

=head1 SYNOPSIS

web_script [--record | --playback | --extend] --file file

Options:

 --help              print this help message
 
 --man               full documentation (not implemented)
 
 --version           print program version information
 
 --record            record a new script to a file

 --playback          re-execute a script from a file
 
 --extend            run a script to it's end, and continue 
                     recording at that point, appending to 
                     the script file
                           
 --file <filename>	specify script file to work with
 									
 - Only one of --record, --playback, or --extend may be specified.
 								
 - All options may be abbreviated to uniqueness; i.e. -h for --help.
 
 