#!/usr/bin/perl

$attrdir=$ARGV[0];

$curr = "";
@bucket = ();

sub bkt_default
{
    local( $attrname, $text ) = @_;
    
    $text =~ s/&/&amp;/g;
    $text =~ s/</&lt;/g;
    $text =~ s/>/&gt;/g;
    
    print ">$text";
}

sub bkt_example
{
    local( $attrname, $text ) = @_;

    $text =~ m/^\s*(.*)\s*:/;    
    local( $attr ) = $1;
  
    #Remove leading attribute name 
    $text =~ s/^\s*$attr\s*:\s*//;

    $text =~ s|\s*$attr\s*:\s*|\n     </example>\n     <example>|g;

    $text =~ s/<br>//g;
    
    print ">$text";
}

sub bkt_description
{
    local( $attrname, $text ) = @_;

    local( $altdesc ) = "$attrdir/attr.$attrname"; 
    if ( -r $altdesc )
    {
        print STDERR "using $altdesc\n";
        open( XXFP, "<$altdesc" );
        $text = join( "", <XXFP> );
        close( XXFP );
    }
    
    
    $text =~ s/(?:<em>|<strong>\s*)?<a\s+href\s*=\s*\"#([^\"]+)\"\s*>\s*(?:<em>|<strong>)?[^<]+(?:<\/em>|<\/strong>)?\s*<\/a>(?:\s*<\/em>|<\/strong>)?/<ref type=\"attr\">$1<\/ref>/ig;
    $text =~ s/(?:<em>|<strong>\s*)?<a\s+href\s*=\s*\"FAQ.html#q([^\"]+)\"\s*>\s*(?:<em>|<strong>)?[^<]+(?:<\/em>|<\/strong>)?\s*<\/a>(?:\s*<\/em>|<\/strong>)?/<ref type=\"faq\">$1<\/ref>/g;
    $text =~ s/(?:<em>|<strong>\s*)?<a\s+href\s*=\s*\"(ht[a-z]+).html\"[^>]*>\s*(?:<em>|<strong>)?[^<]+(?:<\/em>|<\/strong>)?\s*<\/a>(?:\s*<\/em>|<\/strong>)?/<ref type=\"program\">$1<\/ref>/g;

    $text =~ s/<br>/<br\/>/ig;

    # insert missing </li> tags
    $text =~ s#(<li>[^<]+)(?=<(?:li|/ul|/ol)>)#$1</li>#ig;
    $text =~ s#(\s+)</li>(<li>|</ol>|</ul>)#</li>$1$2#ig;

    # replace <tt> with <code>
    
    $text =~ s|</tt>|</code>|g;
    $text =~ s|<tt>|<code>|g;
    
    # fix some dt problems
    $text =~ s/<dd>(\s*(?:<dt>|<\/dl>))/<\/dd>$1/ig;
    $text =~ s|</dd>(\s*)(</td>)|<\/dd>$1  </dl>$1$2|ig;
    
    
    $text =~ s/<dl\s*compact\s*>/<dl compact="true">/ig;

    $text =~ s#(<p>(?:[^<]*<(?:br/|em|/em)>)*[^<]+)(<table)#$1</p>\n$2#g;
    
    $text =~ s/&nbsp;/ /g;
    # fix 

    $text =~ s/<timed\s+out>/&lt;timed out&gt;/;
    $text =~ s/([^-]-)>/$1&gt;/;

    # We create a test version of the text and do some tests on it
    local( $testtxt ) = $text;

    # Strip comments
    $testtxt =~ s/<!--.*-->//g;
    
    # Strip all entities 
    $testtxt =~ s/&lt;//g;
    $testtxt =~ s/&gt;//g;
    $testtxt =~ s/&amp;//g;
    $testtxt =~ s/&quot;//g;
    $testtxt =~ s/&apos;//g;

    # Remove all <br> tags
    $testtxt =~ s|<br/>||g;

    # Remove simple ref tags
    $testtxt =~ s|<ref[^>]+>[^<&]*</ref>||g;
    $testtxt =~ s|<em>[^<&]*</em>||g;
    $testtxt =~ s|<strong>[^<&]*</strong>||g;
    $testtxt =~ s|<a\s+href="[^"]*"[^>]*>[^<&]*</a>||g; #"
    $testtxt =~ s|<p>[^<&]*</p>||g;
    $testtxt =~ s|<code>[^<&]*</code>||g;
    $testtxt =~ s|<codeblock>[^<&]*</codeblock>||g;

    # Remove simple lists
    $testtxt =~ s|<[ou]l>\s*(<li>[^<&]*</li>\s*)+</[ou]l>||g;

    # Remove simple deflidsts
    $testtxt =~ s|<dl(?:\s+compact="true"\s*)?>\s*(<dt>[^<&]*</dt>\s*<dd>[^<&]*</dd>\s*)+</dl>||g;

    #Remove Table bits
    $testtxt =~ s|<table(?:\s*border="[01]")?\s*>\s*(?:<tr>\s*(?:<t([hd])(\s*rowspan="[0-9]+")?(\s*valign="top")?\s*>[^<&]*</t\1>\s*)+</tr>\s*)+</table>\s*||g;

    if ( $testtxt =~ m/[\<\>\&]/ )
    {
       print " badxml=\"yes\"";
       print STDERR "\n=======================================================> $attrname\n";
       print STDERR $testtxt;

    }

    print ">$text";
}

while( <STDIN> )
{
    $line = $_;
    while ( $line ne "" )
    {
        if ( $curr )
        {
            if ( $line =~ m|^(.*)</$curr>(.*\n?)$| )
            {
                push( @bucket, $1 );
                $func="bkt_$curr";
                &$func( $attrname, join( "", @bucket ) );
                print "</$curr>\n";
                $line = $rest;
                $curr = "";
            }
            else
            {
                push( @bucket, $line );
                $line = "";
            }
        }
        else
        {
            if ( $line =~ m/<attribute\s+name="([^"]+)/ )
            {
                $attrname=$1;
            }
                
            if ( $line =~ m/^(.*)<(default|example|description)([^>]*)>(.*\n?)$/ )
            {
                print "$1<$2$3";
                @bucket = ();
                $curr=$2;
                $line=$4;
            }
            else
            {
                print $line;
                $line = "";
            }
        }
    }
}   
    
