# the small letter a with diaresis (ä) as an example

# Check test #5 particularily!

use strict;
use warnings;

use Test::More ('no_plan');

use_ok( "XML::LibXML" );

use constant TESTFILE => "test.xml";

sub Test_String_IO_UTF8_Encoding {
    my $in = "\x{C3}\x{84}"; # this might be corupting

    # unless we don't encode the string properly, the test will fail,
    # because perl thinks it's not UTF8
    my $in2 = encodeToUTF8("utf8", $in);

    my $doc = XML::LibXML::Document->new();
    my $node = XML::LibXML::Element->new('test');    
    
    $node->setAttribute( "foo", $in );
    $doc->setDocumentElement($node);

    # explicitly set encoding 
    $doc->setEncoding('utf8');
    
    my $xmlstring = $doc->toString(0);
    
    # ok now the testing
    my $parser = XML::LibXML->new();
    my $tdoc;
    eval {
        $tdoc = $parser->parse_string( $xmlstring );
    };

    if ( $@ ) {
        warn "Parser Error detected: $@\n";
        return 0;
    } 

    unless ( defined $tdoc ) {
        warn "no document was parsed\n";
        return 0;
    }

    my $de = $tdoc->getDocumentElement();
    unless ( defined $de ) {
        warn "no documentElement found in the document\n";
        return 0;
    }

    my $attrstring = $de->getAttribute( "foo" );
    unless ( defined $attrstring ) {
        warn "no attribute was found\n";
        return 0;
    }

    unless ( $attrstring eq $in2 ) {
        warn "attrstring equals not the original string ('$attrstring' != '$in')\n";
        return 0;
    }

    return 1; 
    
} ok( Test_String_IO_UTF8_Encoding(), 
      "toString() WF test on a UTF8 encoded document" );

sub Test_String_IO_latin1_Encoding {
    my $in = "\x{C3}\x{84}"; # this might be corupting
    # unless we don't encode the string properly, the test will fail,
    # because perl thinks it's not UTF8
    my $in2 = encodeToUTF8("utf8", $in);

    my $doc = XML::LibXML::Document->new();
    my $node = XML::LibXML::Element->new('test');    
    
    $node->setAttribute( "foo", $in );
    $doc->setDocumentElement($node);

    # explicitly set encoding 
    $doc->setEncoding('iso-8859-1');
    
    my $xmlstring = $doc->toString(0);
    
    # ok now the testing 
    my $parser = XML::LibXML->new();
    my $tdoc;
    eval {
        $tdoc = $parser->parse_string( $xmlstring );
    };

    if ( $@ ) {
        warn "Parser Error detected: $@\n";
        return 0;
    } 

    unless ( defined $tdoc ) {
        warn "no document was parsed\n";
        return 0;
    }

    my $de = $tdoc->getDocumentElement();
    unless ( defined $de ) {
        warn "no documentElement found in the document\n";
        return 0;
    }

    my $attrstring = $de->getAttribute( "foo" );
    unless ( defined $attrstring ) {
        warn "no attribute was found\n";
        return 0;
    }

    unless ( $attrstring eq $in2 ) {
        warn "attrstring equals not the original string ('$attrstring' != '$in')\n";
        return 0;
    }

    return 1; 
    
} ok( Test_String_IO_latin1_Encoding(), 
      "toString() WF test on a Latin1 encoded document" );

sub Test_String_Out_File_In_UTF8_Encoding {
    # [PERL BUG IN THIS TEST] 
    my $in = "\x{C3}\x{84}"; # this might be corupting
    # unless we don't encode the string properly, the test will fail,
    # because perl thinks it's not UTF8
    my $in2 = encodeToUTF8("utf8", $in);

    my $doc = XML::LibXML::Document->new();
    my $node = XML::LibXML::Element->new('test');    
    
    $node->setAttribute( "foo", $in2 );
    $doc->setDocumentElement($node);

    # explicitly set encoding 
    #
    # if no encoding is set at all libxml2 (through XML::LibXML) will
    # dump only ASCII characters and character references rather than
    # real UTF8.  In order to get REAL UTF8 one has to use setEncoding
    # or set the encoding in the constructor.
    #
    $doc->setEncoding('utf8');    
    my $xmlstring1 = $doc->toString(0);

    # warn "is UTF8" if utf8::is_utf8( $xmlstring1 );


    open FILE, ">" . TESTFILE;

    # The test breaks if perl IO stores the scalar to a file handle

    # everything works again if the :utf8 layer is set to the file handle. 
    # However, perl *encodes* the internal data to the users Locale.
    # This is mostlikely not correct.
    # In particular in cases where the Locale is set to UTF8, this causes 
    # double encoding (rather than simple output).

    # to see the effect and the test fail, just comment the next line.
    binmode(FILE, ':utf8');

    print FILE $xmlstring1;
    close FILE;

    # ok now the testing 
    my $parser = XML::LibXML->new();
    my $tdoc;
    eval {
        $tdoc = $parser->parse_file( TESTFILE );
    };

    if ( $@ ) {
        unlink TESTFILE;
        warn "Parser Error detected:\n $@\n";
        return 0;
    } 

    unless ( defined $tdoc ) {
        unlink TESTFILE;
        warn "no document was parsed\n";
        return 0;
    }

    my $de = $tdoc->getDocumentElement();
    unless ( defined $de ) {
        unlink TESTFILE;
        warn "no documentElement found in the document\n";
        return 0;
    }

    my $attrstring = $de->getAttribute( "foo" );
    unless ( defined $attrstring ) {
        unlink TESTFILE;
        warn "no attribute was found\n";
        return 0;
    }

    unless ( $attrstring eq $in2 ) {
        unlink TESTFILE;
        warn "attrstring equals not the original string ('$attrstring' != '$in')\n";
        return 0;
    }


    unlink TESTFILE;
    return 1; 
    
} ok( Test_String_Out_File_In_UTF8_Encoding(), 
      "toString() WF test on a UTF8 encoded document (reads from a file)" );


sub Test_String_Out_File_In_Latin1_Encoding {
    my $in = "\x{C3}\x{84}"; # this might be corupting
    # unless we don't encode the string properly, the test will fail,
    # because perl thinks it's not UTF8
    my $in2 = encodeToUTF8("utf8", $in);

    my $doc = XML::LibXML::Document->new();
    my $node = XML::LibXML::Element->new('test');    
    
    $node->setAttribute( "foo", $in2 );
    $doc->setDocumentElement($node);

    # explicitly set encoding 
    $doc->setEncoding('iso-8859-1');

    open FILE, ">". TESTFILE;
    print FILE $doc->toString(0);
    close FILE;

    # ok now the testing 
    my $parser = XML::LibXML->new();
    my $tdoc;
    eval {
        $tdoc = $parser->parse_file( TESTFILE );
    };

    if ( $@ ) {
        unlink TESTFILE;
        warn "Parser Error detected: $@\n";
        return 0;
    } 

    unless ( defined $tdoc ) {
        unlink TESTFILE;
        warn "no document was parsed\n";
        return 0;
    }

    my $de = $tdoc->getDocumentElement();
    unless ( defined $de ) {
        unlink TESTFILE;
        warn "no documentElement found in the document\n";
        return 0;
    }

    my $attrstring = $de->getAttribute( "foo" );
    unless ( defined $attrstring ) {
        unlink TESTFILE;
        warn "no attribute was found\n";
        return 0;
    }

    unless ( $attrstring eq $in2 ) {
        unlink TESTFILE;
        warn "attrstring equals not the original string ('$attrstring' != '$in')\n";
        return 0;
    }

    unlink TESTFILE;
    return 1; 
    
} ok( Test_String_Out_File_In_Latin1_Encoding(), 
      "toString() WF test on a iso-8859-1 encoded document (reads from a file)" );


sub Test_FH_Out_File_In_UTF8_Encoding {
    my $in = "\x{C3}\x{84}"; # this might be corupting
    # unless we don't encode the string properly, the test will fail,
    # because perl thinks it's not UTF8
    my $in2 = encodeToUTF8("utf8", $in);

    my $doc = XML::LibXML::Document->new();
    my $node = XML::LibXML::Element->new('test');    
    
    $node->setAttribute( "foo", $in2 );
    $doc->setDocumentElement($node);

    # explicitly set encoding 
    $doc->setEncoding('utf8');

    open FILE, ">". TESTFILE;
    $doc->toFH(*FILE,0);
    close FILE;

    # ok now the testing 
    my $parser = XML::LibXML->new();
    my $tdoc;
    eval {
        $tdoc = $parser->parse_file( TESTFILE );
    };

    if ( $@ ) {
        unlink TESTFILE;
        warn "Parser Error detected:\n $@\n";
        return 0;
    } 

    unless ( defined $tdoc ) {
        unlink TESTFILE;
        warn "no document was parsed\n";
        return 0;
    }

    my $de = $tdoc->getDocumentElement();
    unless ( defined $de ) {
        unlink TESTFILE;
        warn "no documentElement found in the document\n";
        return 0;
    }

    my $attrstring = $de->getAttribute( "foo" );
    unless ( defined $attrstring ) {
        unlink TESTFILE;
        warn "no attribute was found\n";
        return 0;
    }

    unless ( $attrstring eq $in2 ) {
        unlink TESTFILE;
        warn "attrstring equals not the original string ('$attrstring' != '$in')\n";
        return 0;
    }


    unlink TESTFILE;
    return 1; 
    
} ok( Test_FH_Out_File_In_UTF8_Encoding(), 
      "toFH() WF test on a UTF8 encoded document (reads from a file)" );


sub Test_FH_Out_File_In_Latin1_Encoding {
    my $in = "\x{C3}\x{84}"; # this might be corupting
    # unless we don't encode the string properly, the test will fail,
    # because perl thinks it's not UTF8
    my $in2 = encodeToUTF8("utf8", $in);

    my $doc = XML::LibXML::Document->new();
    my $node = XML::LibXML::Element->new('test');    
    
    $node->setAttribute( "foo", $in2 );
    $doc->setDocumentElement($node);

    # explicitly set encoding
    $doc->setEncoding('iso-8859-1');

    open FILE, ">". TESTFILE;
    $doc->toFH(*FILE,0);
    close FILE;

    # ok now the testing 
    my $parser = XML::LibXML->new();
    my $tdoc;
    eval {
        $tdoc = $parser->parse_file( TESTFILE );
    };

    if ( $@ ) {
        unlink TESTFILE;
        warn "Parser Error detected:\n $@\n";
        return 0;
    } 

    unless ( defined $tdoc ) {
        unlink TESTFILE;
        warn "no document was parsed\n";
        return 0;
    }

    my $de = $tdoc->getDocumentElement();
    unless ( defined $de ) {
        unlink TESTFILE;
        warn "no documentElement found in the document\n";
        return 0;
    }

    my $attrstring = $de->getAttribute( "foo" );
    unless ( defined $attrstring ) {
        unlink TESTFILE;
        warn "no attribute was found\n";
        return 0;
    }

    unless ( $attrstring eq $in2 ) {
        unlink TESTFILE;
        warn "attrstring equals not the original string ('$attrstring' != '$in')\n";
        return 0;
    }


    unlink TESTFILE;
    return 1; 
    
} ok( Test_FH_Out_File_In_Latin1_Encoding(), 
      "toFH() WF test on a Latin1 encoded document (reads from a file)" );


sub Test_File_Out_File_In_UTF8_Encoding {
    my $in = "\x{C3}\x{84}"; # this might be corupting
    # unless we don't encode the string properly, the test will fail,
    # because perl thinks it's not UTF8
    my $in2 = encodeToUTF8("utf8", $in);

    my $doc = XML::LibXML::Document->new();
    my $node = XML::LibXML::Element->new('test');    
    
    $node->setAttribute( "foo", $in2 );
    $doc->setDocumentElement($node);

    # explicitly set encoding 
    $doc->setEncoding('utf8');

    $doc->toFile(TESTFILE,0);

    # ok now the testing 
    my $parser = XML::LibXML->new();
    my $tdoc;
    eval {
        $tdoc = $parser->parse_file( TESTFILE );
    };

    if ( $@ ) {
        unlink TESTFILE;
        warn "Parser Error detected:\n $@\n";
        return 0;
    } 

    unless ( defined $tdoc ) {
        unlink TESTFILE;
        warn "no document was parsed\n";
        return 0;
    }

    my $de = $tdoc->getDocumentElement();
    unless ( defined $de ) {
        unlink TESTFILE;
        warn "no documentElement found in the document\n";
        return 0;
    }

    my $attrstring = $de->getAttribute( "foo" );
    unless ( defined $attrstring ) {
        unlink TESTFILE;
        warn "no attribute was found\n";
        return 0;
    }

    unless ( $attrstring eq $in2 ) {
        unlink TESTFILE;
        warn "attrstring equals not the original string ('$attrstring' != '$in')\n";
        return 0;
    }


    unlink TESTFILE;
    return 1; 
    
} ok( Test_File_Out_File_In_UTF8_Encoding(), 
      "toFile() WF test on a UTF8 encoded document (reads from a file)" );


sub Test_File_Out_File_In_Latin1_Encoding {
    my $in = "\x{C3}\x{84}"; # this might be corupting
    # unless we don't encode the string properly, the test will fail,
    # because perl thinks it's not UTF8
    my $in2 = encodeToUTF8("utf8", $in);

    my $doc = XML::LibXML::Document->new();
    my $node = XML::LibXML::Element->new('test');    
    
    $node->setAttribute( "foo", $in2 );
    $doc->setDocumentElement($node);

    # explicitly set encoding
    $doc->setEncoding('iso-8859-1');

    $doc->toFile(TESTFILE,0);

    # ok now the testing 
    my $parser = XML::LibXML->new();
    my $tdoc;
    eval {
        $tdoc = $parser->parse_file( TESTFILE );
    };

    if ( $@ ) {
        unlink TESTFILE;
        warn "Parser Error detected:\n $@\n";
        return 0;
    } 

    unless ( defined $tdoc ) {
        unlink TESTFILE;
        warn "no document was parsed\n";
        return 0;
    }

    my $de = $tdoc->getDocumentElement();
    unless ( defined $de ) {
        unlink TESTFILE;
        warn "no documentElement found in the document\n";
        return 0;
    }

    my $attrstring = $de->getAttribute( "foo" );
    unless ( defined $attrstring ) {
        unlink TESTFILE;
        warn "no attribute was found\n";
        return 0;
    }

    unless ( $attrstring eq $in2 ) {
        unlink TESTFILE;
        warn "attrstring equals not the original string ('$attrstring' != '$in')\n";
        return 0;
    }


    unlink TESTFILE;
    return 1; 
    
} ok( Test_File_Out_File_In_Latin1_Encoding(), 
      "toFile() WF test on a Latin1 encoded document (reads from a file)" );


