#!/usr/bin/perl

##########################################################################
#
# pl2doc.pl: A perl script to convert Microsoft Word documents to Plucker
#            format, using the wv toolkit and the current Python distiller
#
# Written by: David A. Desrosiers
# Contact at: hacker at gnu dash designs dot com
#
# Copyright 2002.  This may be modified and distributed on the same terms
# as Perl itself. 
#
# wv can be found at the wvWare homepage: http://www.wvware.com/
# Plucker can be found at the Plucker homepage: http://plkr.org/
#
# 09/29/02: version 0.0.1, first cut. Does simple validation, convertion
# 
# TODO: 
#    - Detect Microsoft Word documents of the wrong Content-type
#    - Update to handle PDF (easy conversion of convert_doc)
#    - Clean up the cruft, add more error handling
#
# Usage:
# 
#    perl ./pl2doc.pl http://www.domain.com/path/to/file.doc
#
##########################################################################

use strict;					# Clean code == good code
use Date::Manip;				# date functions
use Digest::MD5 qw(md5 md5_hex md5_base64);	# convert date to md5
use File::Path;					# mkpath/rmtree
use LWP::Simple;				# for getstore()
use LWP::UserAgent;				# HEAD request
use HTTP::Request; 				# Content-Foo

#########################################################
#
# Set this path to something useful, /tmp should be fine
#
#########################################################
my $workpath	= "/tmp";

my $date        = UnixDate("today","%b %e, %Y at %T");
my $md5file     = md5_hex($date);
my $agent	= "ppdc 0.0.1 (Plucker Perl Doc Convertor)";
my $url		= $ARGV[0];
my $file 	= "$md5file.doc";

#########################################################
#
# Defaults based on your device
#
#########################################################
my $bpp		= 4;
my $compression	= "zlib";

#########################################################
#
# Change these to point to the proper location on your
# system for wvHtml and plucker-build
#
#########################################################
my $pb_bin	= "/usr/bin/plucker-build";
my $wv_bin	= "/usr/bin/wvHtml";

#########################################################
#
# Maximum document size you want to fetch, remotely
#
#########################################################
my $max_docsize = 400000;

#########################################################
#
# Don't mess with these values below, things will break
#
#########################################################
my $req         = HTTP::Request->new(HEAD=>$url);
my $ua          = LWP::UserAgent->new;
$ua->agent("$agent");
my $resp        = $ua->request($req);
my $status_line = $resp->status_line;
my $type	= $resp->header('Content-Type');
my $size	= $resp->header('Content-Length');


#########################################################
#
# Commify the output file size in Content-Length into 
# usable human-readable byte values
#
#########################################################
my $bprecise    = sprintf "%.0f", $size;
my $bsize	= insert_commas($bprecise);

my $fetchit	= 0;

print "\n$agent\n" . "-"x39 . "\n";
print "Date...........: $date\n";
print "Content type...: $type\n";
print "Document size..: $bsize bytes\n\n";

if (($size < $max_docsize) && ($type =~ /msword/)) {
	print "Document smaller than the allowed size, proceeding with conversion..\n"; 
	$fetchit = 1;
	convert_doc($md5file, $url);
} elsif ($type !~ /msword/) {
	print "Document must be a Microsoft Word .doc or .rtf document. The document type\n";
	print "$type is not valid here. Please try again with a .doc or .rtf file.\n\n";
} else {
	my $max_bsize = insert_commas($max_docsize);
	print "Document larger than allowed size ($max_bsize bytes), not fetching\n";
}

#########################################################
#
# This just commifys the bytes in the remote file
#
#########################################################
sub insert_commas {   
        my $text = reverse $_[0];
        $text =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g;
        return scalar reverse $text;
}

#########################################################
#
# Fetch and convert the document into Plucker format
# We use md5sum of the date here so that there is no 
# chance of clobbering any existing files or existing 
# directories on the host system.
#
#########################################################
sub convert_doc {
	my ($md5file, $url) = @_;
	mkpath(["$workpath/$md5file"], 0, 0711);
	my $file	= "$md5file.doc";
	my $status	= getstore($url, "$workpath/$md5file/$file");

	my $syscmd 	= "$wv_bin";
	my @sysargs	= ("--targetdir=$workpath/$md5file",
			   "$md5file.doc",
			   "$md5file.html");

	chdir "$workpath/$md5file";
	system($syscmd, @sysargs);

	#################################################
	#
	# Truncate the output filename itself to less 
	# than 26 characters so Palm does not barf
	#
	#################################################
	my $trunc_md5file = substr($md5file, 0, 26);

	my $plcmd	= "$pb_bin";
	my @plargs	= ('-H',
			   "file:$workpath/$md5file/$md5file.html",
			   '--maxdepth=1',
			   "--bpp=$bpp",
			   "--$compression-compression",
			   '-V0',
			   '-f',
			   "$workpath/$trunc_md5file");
	
	system($plcmd, @plargs);
	rmtree(["$workpath/$md5file"], 0, 1);
	print "Don't forget to sync $trunc_md5file.pdb to your Palm handheld!\n\n";
}

#
# Fin. (c) 2002, David A. Desrosiers
