package Bio::Genex::GeneXML::DOMParser;
use strict;
use vars qw(@ISA $DEBUG $AUTOLOAD $ERROR_HANDLER);
use Carp;
use XML::Xerces;
use File::Basename;
use Class::ObjectTemplate 0.2;
require Exporter;

$DEBUG = 1;
@ISA = qw(Class::ObjectTemplate Exporter);
BEGIN {
  attributes qw(error_handler
		validate
		namespace
		xml_decl_node
		input_source
		_doc
		_parser
		entity_ref_node);
}

sub initialize {
  my $self = shift;
  my $parser = XML::Xerces::DOMParser->new();
  unless (defined $self->error_handler && !$self->error_handler) {
    # add a default error handler
    $ERROR_HANDLER = XML::Xerces::PerlErrorHandler->new();
    $parser->setErrorHandler($ERROR_HANDLER);
  }

  # we validate, handle namespaces, and create XMLDecl nodes and
  # EntityReference nodes by default
  $parser->setDoValidation (1)
    unless defined $self->validate && !$self->validate;

  $parser->setDoNamespaces (1)
    unless defined $self->namespace && !$self->namespace;

  $parser->setToCreateXMLDeclTypeNode (1)
    unless defined $self->xml_decl_node && !$self->xml_decl_node;

  $parser->setCreateEntityReferenceNodes(1)
    unless defined $self->entity_ref_node && !$self->entity_ref_node;


  $self->_parser($parser);
  return $self;
}

# a convenience function so that we don't have to create all the sources
# ourselves ever time
sub parse {
  my $self = shift;
  my $source = shift;

  # check if we've been passed an input source object
  unless (ref($source)) {
    # it wasn't so put it back on the parameter stack
    unshift(@_,$source);
    my %args = @_;
    if (exists $args{membuf}) {
      $source = XML::Xerces::MemBufInputSource->new($args{membuf},'fakeid');
    } elsif (exists $args{file}) {
      $source = XML::Xerces::LocalFileInputSource->new($args{file});
    } elsif (exists $args{url}) {
      my $url = XML::Xerces::XMLURL->new($args{url});
      $source = XML::Xerces::URLInputSource->new($url);
    } elsif (exists $args{stdin}) {
      $source = XML::Xerces::StdInInputSource->new();
    } else {
      error(caller=>'Bio::Genex::GeneXML::DOMParser::parse',
	    message=>"Unknown Source type: " . join(' ', keys %args));
    }
  }
  # store the input source for later reference
  $self->input_source($source);
  return $self->_parser->parse($source);
}

# we make sure that we return a GeneXML subclass
sub get_genexml_document {
  my $self = shift;
  my $doc = $self->_doc();
  unless (defined $doc) {
    my $class = shift;
    $class = 'Bio::Genex::GeneXML' unless defined $class;
    $doc = $self->_parser->getDocument();
    $doc = $class->new(_doc=>$doc,_parser=>$self);
    # set the input source for the document
    $doc->input_source($self->input_source);
    $self->_doc($doc);
  }
  return $doc;
}

sub AUTOLOAD {
  my $self = shift;
  my $method = $AUTOLOAD;
  $method =~ s/.*://;		# strip fully-qualified portion

  # if we couldn't handle it, we'll assume the XML::Xerces::DOMParser can
  if ($self->_parser->can($method)) {
    no strict 'refs';
    return $self->_parser->$method(@_);
  } else {
    error(caller=>join(':',(caller())[1,2]),
	  message=>"Called $method in GeneXML::DOMParser::AUTOLOAD");
  }
}

sub DESTROY {
  my $self = shift;
  # get rid of potential circular references
  $self->_doc(undef);
}




My applications simply look like:

# create a parser, parser the file, and get the document
my $PARSER = Bio::Genex::GeneXML::DOMParser->new();
$PARSER->parse(file=>$OPTIONS{file});
my $DOC = $PARSER->get_genexml_document();


# now get the list of contacts
my @contact_list = $DOC->getElementsByTagName('contact');
my @con_dbs;
foreach my $con_node (@contact_list) {
  push(@con_dbs,Bio::Genex::Contact->xml2db(doc=>$DOC,node=>$con_node))
}

The xml2db method takes a bunch of <contact> elements and converts
them to Bio::Genex::Contact objects:

package Bio::Genex::Contact;
sub xml2db {
  my ($DOC,$con_node,$con_id,$ID_TABLE,$con_db) =
    Bio::Genex::XMLUtils::xml2db(@_);
  return $con_db if defined $con_db;

  my @types = split(' ', $con_node->getAttribute('types'));
  error(caller=>__PACKAGE__ . '::xml2db',
	message=>"Couldn't find type for node: " . $con_node->serialize())
    unless scalar @types;
  my $name = $con_node->getAttribute('contact_person');
  $name = $con_node->getAttribute('organization') unless $name;
  error(caller=>__PACKAGE__ . '::xml2db',
	message=>"Must define either 'contact_person' or 'organization': " 
	. $con_node->serialize()) unless $name;

  my %attrs = $con_node->getAttributes();

  # get rid of non-object attributes
  delete @attrs{qw(types security_id id)};
  # set the pkey attr to be the XML id string
  $attrs{con_pk} = $con_id;

  # add the new contact object to the ID lookup table
  $con_db = Bio::Genex::Contact->new(%attrs);
  $ID_TABLE->{"Contact:$con_id"} = $con_db;

  # set the ContactType entries
  my $ct_db = Bio::Genex::ContactType->xml2db(doc=>$DOC,node=>$con_node);
  $con_db->contacttype_obj($ct_db);

  my $co_db = Bio::Genex::ContactOwner->xml2db(doc=>$DOC,id=>$con_id);
  $con_db->contactowner_obj($co_db);

  return $con_db;
}


Here's an example piece of xml from the file showing a <contact>
entry: 

<contact
  id="contact_appliedprecision"
  types="software_image_analysis"
  organization="Applied Precision, Inc."
  org_mail_address="Applied Precision, Inc. 
                1040 12th Avenue Northwest
                Issaquah, Washington 98027
                USA"
  org_phone="+1 (425) 557-1000"
  org_fax="+1 (425) 557-1000"
  org_email="saleshotline@api.com"
  url=""
  security_id="security_public"
/>

I don't know if this more confusing than it's worth, but you asked.

jas.

