package ReadAD;

# vim:ts=3 sw=3 ai:

require 5.005_62;
use strict;
use warnings;
use Carp;

use fields;

require Exporter;

our @ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

our @EXPORT = qw(
	ReadAD
);
our $VERSION = '0.01';


# Preloaded methods go here.

# sub ReadAD
#
# this subroutine takes 4 mandatory named args
# dn            - the dn to start reading in the directory
# file          - the filename to write the directory to
#                 this file will be overwritten
# callback      - a callback function to call for each line of real data
#                 this gets passed the filename and a psuedo hash
#                 containing key, value pairs of the actual data
#                 in the csv file.
# attributes    - the field names that you want to read from AD
#                 *** NOTE you should ensure the case of the field
#                 *** names you pass is the same as csvde returns
#
# the following args are optional
# usecache      - if 0 (the default), then ReadAD() exports the data
#                 from AD before processing the resultant output file.
#                 if 1, ReadAD() re-reads the existing file, so long
#                 as it is less than 1 day old. If the file is greater
#                 than 1 day old, ReadAD() refreshes the file from AD.
#                 
# returns       - a reference to a hash, where each element of the 
#                 hash is a key, value pair returned by your callback
#                 routine

sub ReadAD {
	my(%options)=(
		usecache	=> 0,
		filter	=> "(objectClass=*)",
		@_,
	);

	my($usage)="usage:
	dn          => directory path (mandatory)
	file        => <filename> (mandatory)
	callback    => reference to a callback function (mandatory)
	attributes  => attributes (array reference) (mandatory)
	filter		=> optional (ldap filter to apply)
	usecache    => BOOL - use output from last run if less than 6 hour old
	               (optional, default = 1)
";

	# concatenate @attrs together suitable for passing to csvde
	my($attrs)=join(",", @{$options{'attributes'}});

	for my $arg (qw(dn file callback attributes)) {
		croak "ReadAD(): mandatory argument ($arg) missing\n$usage"
			unless (exists $options{$arg});
	}

	if (($options{'usecache'} == 1) && (-f $options{'file'} && -M "$options{'file'}" < 0.25)) {
		print "\n*** USING CACHED LDIFDE FILE '$options{'file'}' ***\n\n";
	}
	else {
		print "Running LDIFDE directory export to file '$options{file}' ...\n";
		unlink("$options{'file'}");
		open(LDIFDE, qq(ldifde -s $ENV{'COMPUTERNAME'} -f "$options{'file'}.new" -d "$options{'dn'}" -n -l "$attrs" -r "$options{'filter'}" 2>&1 |))
			or die "Can't run LDIFDE, $^E\n";

		while (<LDIFDE>) {
			next if /^\s*$/;
			next if /^Connecting to "$ENV{'COMPUTERNAME'}"$/;
			next if /^Logging in as current user using SSPI$/;
			next if /^Exporting directory to file $options{'file'}.new$/;
			next if /^Searching for entries\.\.\.$/;
			next if /^The command has completed successfully$/;
			next if /^\d+ entries exported$/;

			print ">	$_" unless /^Writing out entries/;
			if (/No Entries found/) {
				warn "LDIFDE: no entries found for $options{'dn'}\n";
				return undef;
			}
		}

		close(LDIFDE);
		rename("$options{'file'}.new", "$options{'file'}");
		
		print "Done running LDIFDE directory export to file '$options{'file'}'\n\n";
	}

	# return whatever parse_ldifde returns
	return &parse_ldifde($options{'dn'}, $options{'file'}, $options{'callback'}, @{$options{'attributes'}});
}

# sub parse_ldifde
# $file      - the file containing the data
# $callback  - see documentation above
# @attrs		 - the attributes requested - not necessarily the same
#              as the attribute returned
#
# returns    - a reference to a hash, where each element of the hash
#              is a key/value pair returned by your callback routine
# 		
# 		the input file is expected to be in the format dumped by a run
# 		of csvde. As such, it will contain a header line and then 0 or
# 		more lines of data. For each data line, a psuedo-hash is created
# 		containing the data. Each field of the psuedo-hash has the name
# 		of the header field and the corresponding data as its value.
# 		This data is passed to the callback routine for checking and
# 		if necessary modifying

sub parse_ldifde {
	my($dn, $file, $callback, @attrs)=@_;

	my($records)=0;
	my(%ATTRS)=();
	my(%attrhash);

	@attrhash{@attrs}=();
	local($/)="";

	print "Reading file '$file' exported from AD ...\n";
	open(LDIF, "<$file")
		or die "Can't open $file for reading\n";
	
	my($entry);
	#$entry=<LDIF>;		# grab header line
	#
	#unless ($entry =~ /^dn: $dn$/mi) {
	#print "Format of $file is wrong - can't continue\n$entry$dn";
	#return undef;
	#}
	
	local($")="|";
	while ($entry = <LDIF>) {
		$records++;
		my(@data)=split(/^(\w+):\s+/m, $entry);

		# get rid of initial null entry
		shift(@data);

		# chomp the remainder
		chomp(@data);

		if (($#data % 2) != 1) {
			die "Entry has too few fields\n$entry\n";
		}
		my($key, $value);

		my($phash)=fields::phash([@attrs], []);

		while ($#data > 0) {		# only continue if we have at least two fields
			$key=shift @data;
			$value=shift @data;

			next unless exists $attrhash{$key};

			# we're not interested in the 'changetype' psuedo attribute
			next if $key eq "changetype";
			#print "got $key, $value for $phash->{cn}\n" if $key eq "extensionAttribute10";

			$value=~s/\n\s+//mg;	# remove any embedded newlines

			# if the entry already exists then its either an
			# array or plain entry - check this out and do the
			# right thing
			if (exists $phash->{$key}) {
				if (ref($phash->{$key}) eq "ARRAY") {
					push(@{$phash->{$key}}, $value);
				}
				else {
					# turn existing entry into an array
					$phash->{$key}=[$phash->{$key}, $value];
				}
			}
			else {
				$phash->{$key}=$value;
			}
		}

		if (defined $callback) {
			($key, $value)=&$callback($file, $phash);
		}
		else {
			($key, $value)=($phash->{$attrs[0]}, $phash);
		}

		if (! defined $key || $key =~ /^\s*$/) {
			print "*** ReadAD(): cowardly refusing to create an entry with a null key\nThe original entry was\n$entry\n";
			next;
		}
		$ATTRS{$key}=$value;
	}

	close(LDIFDE);

	print "Done reading file '$file'.\n";
	print "Read $records records read from AD export file '$file'\n\n";

	return \%ATTRS;
}

1;
__END__
=head1 NAME

ReadAD - Perl extension to dump a DN from AD using the fairly efficient ldifde 
and return the data in a hash reference

=head1 SYNOPSIS

use ReadAD;

ReadAD(dn => $DN, file => $TempFileName, callback => $callback,
       attributes => @attributes, [ usecache => BOOL ]);

=head1 DESCRIPTION

ReadAD() takes the following named arguments (e.g. key/value pairs):-

=over

=item

dn       - the dn to start reading in the directory

=item

file     - the filename to write the directory to this file will be overwritten

=item

callback - a callback function to call for each line of real data
            this get passed the DN, filename and a psuedo hash
            containing key, value pairs of the actual data
            in the csv file.

=item

attributes    - the field names that you want to read from AD
            *** NOTE you should ensure the case of the field
            *** names you pass is the same as csvde returns. This should
				be passed as a reference to an array containing the field names.

=item

usecache   - (optional) a BOOL saying whether to read the data from AD or
             not. If set to '1' but the existing data file is older than
				 1 day, ReadAD() sets this to 0 and re-extracts the data.

=item

returns   - a reference to a hash, where each element of the 
            hash is a key, value pair returned by your callback
            routine

=back

ReadAD() attempts to dump directory information from the active directory
starting from the supplied DN. It uses the running processes
security context to access the directory. The directory is read by first
running LDIFDE using the directory name and attributes (@attr) to produce
an output file (specified by $file). This output file is then parsed. For each
entry read, ReadAD produced a perl-psuedo hash containing the keys and values
for the attributes requested. Where no attributes were returned from AD, the
corresponding values will be undef. For attributes where multiple values are
returned, the values will be coerced into array references. (Note that
this is a per-entry coercion. If the data for one entry only has one
value for a given attribute, it will return a scalar. For the same attribute
in a different entry where multiple values are returned, the value will
be an array reference).

Once each entry has been read, the supplied callback validation function is
called. This is supplied with $dn and the psuedo-hash, $phash with fields
corresponding to the attribute names supplied in @attrs, for the entry. This
callback function can then check the data for any errors and can modify it
should the need arise. It should return an array consisting of key and value to the called (undef on error) - these will normally be a unique field (e.g. cn)
from the $phash and the supplied $phash.

=head2 RETURNS

If successful, ReadAD() returns a hash reference whose keys and and values
returned from each call to the validation function. On failure, undef
is returned.

=head2 EXPORTS

ReadAD()

=head1 AUTHOR

Bret Giddings <bret@essex.ac.uk>

=head1 SEE ALSO

perl(1), documentation for fields module (perldoc fields).

=cut