#
# manage_attributes.pl
#
# A perl script that manages access to defaults.xml
#
# Part of the ht://Dig package   <http://www.htdig.org/>
# Copyright (c) 1995-2000 The ht://Dig Group
# For copyright details, see the file COPYING in your distribution
# or the GNU Public License version 2 or later
# <http://www.gnu.org/copyleft/gpl.html>
#
# $Id$
#
#
# API DOCUMENTATION
# ------------------------------------------------------
#
# This set of perl functions is used to manage access to
# default.xml. 
#
#  LoadAttributeData( [$doSmallLoad:boolean] ): boolean
#
#     This function will attempt to load in the data
#     from default.xml. If it encounters any errors
#     it will write those to STDERR and then return FALSE,
#     otherwise it will simple return TRUE.
#
#     By default it will load a complete set of data - 
#     if it passed a TRUE value it will only load the
#     minimal ammount of data required to generate defaults.cc
#
#     This function must be called before any of the other
#     functions in this file will work.
#
#  GetAttributeNames() : list
#
#     This function will return a list of all the attribute
#     names, in the order they are specified in defaults.xml
#
#  
#  IsValidAttributeName( $name : string ) : boolean
#
#     This function will return TRUE if $name represents
#     a valid attribute name, otherwise it will return 
#     false.
#
#  GetAttributeData( $name : string ) : reference to hash
#
#     If $name does not represent a valid attribute this
#     function will return undef. If it does, it will 
#     return a reference (pointer) to a hash. This
#     hash will always contain the following keys:
#
#      name    : string  = attribute name
#      default : string  = default value of the attribute
#      macro   : boolean = TRUE if the default value is a C Macro
#
#     If the LoadAttributeData was not called with TRUE then
#     it will also contain the following:
# 
#      type     : string = attribute type
#      programs : reference to list of strings = list of programs it appears in
#                 ( can be just [ "all" ] )
#      version  : string = version of ht://dig in which the attribute
#                 first appeeared, or "all" if it has always been there
#      category : string = category
#      block    : string = scope block
#      nodocs   : boolean = TRUE if this attribute is not to be 
#                 included in the documentation
#      programs : reference to list of strings = list of example values to 
#                 include ( if there are no example, it will contain a 
#                 reference to an empty list )
#
#  GetHTMLDescription( $name : string,
#                      $reffunc : string,
#                      $codeblockfunc : string ) : text
#
#      This function will return the description for
#      attribute $name rendered as HTML. The other two 
#      parameters are the names of function to be
#      to render bits of it:
#
#         $reffunc : this is the name of a function 
#            to generate links from <ref> elements.
#            It should be of the form:
#
#              RefFunc( $type : string,
#                       $name : string ) : text;
#  
#            where $type will be one of "faq", "attr" or 
#            "program" and $name will the link data. It 
#            should return an <a> element that links
#            the right place.
#
#         $codeblockfunc : this is the name of the 
#            function used to render <codeblock> 
#            elements as HTML. It should be of the form:
#            
#              RefFunc( $content : text ) : text;
#  
#            where $content is the content of the 
#            codeblock element. It should return a 
#            snippet of HTML.
#
# This entire set of function is implented using simple 
# perl text matching - the aim is that at some point 
# the under the bonnet stuff can be switched over using 
# a validating XML parser at some point.
#
#-----------------------------------------------------------------------------

# the XML file and the DTD are expected to be in the
# same directory as this file

$thisdir = __FILE__;
if ( ! ( $thisdir =~ m|/| ) ){ $thisdir = "./" . $thisdir; }
$thisdir =~ s|/[^/]+$||g;

$DEFAULT_XML = "$thisdir/defaults.xml";
$DEFAULT_DTD = "$thisdir/defaults.dtd";

$loadType = "";

#---------------------------------------------------------
# This is filter that takes each line or input in turn
# and filters out all XML comments of the form <!-- -->
#
# It has one piece of memory  - namely
# whether or no it was in a comment when it was last called.

sub filter_comments
{
   local( $line ) = @_;

   if ( $incomt )
   {
      if ( $line =~ m/-->/ )
      {
         #Ok. We are in a comment and we have an end of
         #comment marker detected. We clean out to that
         #point
         $line =~ s/^.*-->//;
         $incomt = 0;
      }
      else
      {
         # We are in a comment and it hasn't finished
         return "";
      }
   }

   #If the program gets to here then the start of the
   #line is outside of any comments

   #Remove and complete comments
   $line =~ s/<!--.*-->//g;

   #The only thing that could be left now that is 
   #an issue is the start of a new comment

   if ( $line =~ s/<!--.*\n?// ){ $incomt = 1 };
   
   return $line;
}

#--------------------------------------------------------
# This takes each line in turn that has been through
# the comment filter. What it does is to collect
# together an entire "attribute" element as one 
# lump of text and then process it in one hit 
# through the process_attr function

sub process_line
{
   local( $line , $loadAll ) = @_;

       while ( $line )
   {
      if ( $inattr )
      {
         if ( $line =~ m|^(.*</attribute>)(.*\n?)$| )
         {
             push( @attrbucket, $1 );
             $line = $2;
             $inattr = 0;
             &process_attr( join( "", @attrbucket ) , $loadAll);
             @attrbucket = ();
         }
         else
         {
             push( @attrbucket, $line );
             $line = "";
         }
      }
      else
      {
         if ( $line =~ m|^.*(<attribute.*\n)$| )
         {
             $line = $1;
             if ( $line =~ m|(.*</attribute>)(.*\n?)| )
             {
                # This covers off the possibility of a whole attribute
                # in one line. This is in the "unlikely, but legal"
                # category

                $line = $2;
                &process_attr( $1 , $loadAll );
             }
             else
             {
                push( @attrbucket, $line );
                $inattr = 1;
                $line = "";
             }
         }
         else
         {
             $line = "";
         }
      }
   }
}


sub resolve_entities
{
   local( $text ) = @_;
   
   $text =~ s/&lt;/</g;
   $text =~ s/&gt;/>/g;
   $text =~ s/&apos;/'/g;
   $text =~ s/&quot/"/g; #"
   $text =~ s/&amp;/&/g;
   
   return $text;
}   
       
sub process_attr
{
   local( $attr , $loadAll ) = @_;
   local( $name, $value );
   local( $data ) = {};
   
   local( @xmlattrs ) = ( "name" );
   local( %optattrs ) = ();
   
   if ( $loadAll )
   {
      @xmlattrs = ( "name", "type", "programs", "version", "category" , "block" );
      $optattrs{ "block" } = "Global";
   }
   
   #if ( $attr =~ m/<attribute[^>]+\s+name\s*=\s*[\"\']([^\"\']+)/ )
   foreach $xan ( @xmlattrs )
   {
      if ( $attr =~ m/<attribute[^>]*?\s+$xan\s*=\s*[\"\']([^\"\']*)/ )
      {
         $data->{$xan} = $1;
      }
      elsif ( defined( $optattrs{$xan} ) )
      {
         $data->{$xan} = $optattrs{$xan};
      }
      else
      {
         die "Unable to extract a $xan for attribute";
      }
   }
   
   $name = $data->{"name"};
   
   if ( $data->{"programs"} )
   {
       local( $progs ) = $data->{"programs"};
       
       $progs =~ s/^\s+//; # Remove leading ..
       $progs =~ s/\s+$//; # and trailing whitespace
       
       local( $proglist ) = [];
       push( @$proglist, split( /\s+/, $progs ) );
       
       $data->{"programs"} = $proglist;
   }
   elsif( $loadAll )
   {
       $data->{"programs"} = []; # Set it to empty list
   }

   if ( $attr =~ m/<default(?:\s+configmacro\s*=\s*[\"\']([^\"\']+)[\"\']\s*)?>(.*)<\/default>/ )
   {
      $data->{"macro"}   = ( $1 eq "true" ? 1 : 0 );
      $data->{"default"} = &resolve_entities( $2 );
   }
   else
   {
      die "Unable to get a value for attribute $name";
   }
   
   if ( $loadAll )
   {
      $data->{"nodocs"} = 0;
      
      if ( $attr =~ m|<nodocs/>| )
      {
         $data->{"nodocs"} = 1;
      }
      else
      {
      
         # -----------------------------------------------------------
         # Extract examples - if there are no examples it 
         # will be set to an empty list
         
         local( @egbits ) = split( /<example>/, $attr );
         local( $eglist ) = [];
         
         shift @egbits; # The first item is everything up to the first example,
                        # which we don't want
                        
         foreach ( @egbits )
         {
            s|</example>(.*\n?)*||; 
            push( @$eglist, &resolve_entities($_) );
         }
         
         $data->{"examples"} = $eglist;

         # -----------------------------------------------------------
         # Extract description - if there are no examples it 
         # will be set to an empty list

         #print $attr;
         if ( $attr =~ m|<description(?:\s+badxml\s*=\s*[\"\']([^\"\']+)[\"\']\s*)?>(.*)</description>|s )
         {
             $data->{"description"} = $2;
         }
         
      }
   }
   
   push( @attributeNames, $name );
   $attributeTable{ $name } = $data;
}


sub GetAttributeNames
{
    return @attributeNames;
}

sub IsValidAttributeName
{
    local ($varname) = @_;
    return defined( $attributeTable{ $varname } );
}

sub GetAttributeData
{
    local ($varname) = @_;
    return $attributeTable{ $varname };
}

sub GetHTMLDescription
{
    local( $varname, $reffunc, $codeblockfunc ) = @_;

    local( $desc ) = $attributeTable{ $varname }->{"description"};

    # turn "<br/>" into "<br>";
    $desc =~ s|<br\s*/>|<br>|g;

    # Generate reference links
    $desc =~ s|<ref\s+type\s*=\s*[\"\']([^\"\']*)[\"\']\s*>([^<]*)</ref>|&$reffunc($1,$2)|eg;
    
    # Generate codeblocks
    $desc =~ s|<codeblock>\s*(.*)\s*</codeblock>|&$codeblockfunc($1)|esg;
    
    return $desc;
}

sub LoadAttributeData
{
   local( $doSmallLoad ) = @_;
   local( $loadAll ) = ( $doSmallLoad ? 0 : 1 );
   

   # If we have already loaded the appropriate data
   return 1 if ( $loadType eq "large" );
   return 1 if ( $doSmallLoad && ($loadType eq "small" ) );

   if ( ! open( XMLFP, "<$DEFAULT_XML" ) )
   {
      print STDERR "Unable to open $DEFAULT_XML for reading";
      return 0;
   }

   eval
   { 
      $incomt = 0;
      $lineno = 0;
      @attrbucket = ();
      @attributeNames = ();
      %attributeTable = ();
      
      while( <XMLFP> )
      {
         $line=$_;
         $lineno++;
         
         $line = &filter_comments( $line );
         &process_line( $line , $loadAll );
      }

      close( XMLFP );
   };
   if ( $@ )
   {
       print STDERR "Error at line $lineno of $DEFAULT_XML : $@";
       return 0;
   }

   return 1;
}

1;
