# (c)Peter Gervai <grin@grin.hu>, 2005
# $Id: UniString.pm 694 2005-07-11 15:25:47Z grin $
# Released under GPL v2
#
# universal string object
#
# handles utf8 / nonutf input, and output in any encoding

package UniString;

use strict;
use Encode;

=head1 NAME

UniString - handle any string encoding

=head1 SYNOPSYS

This module handles strings in any encoding, converts them into
perl's internal utf8 characters, and emit any encoding the user
needs using that string. 

=head1 PUBLIC INTERFACE

=over 4

=cut

=item new()

Create an UniString object

  use UniString;

  my $U = UniString->new( [default input/output encoding] );

=cut

sub new {
    my $Type = shift;
    my $default = shift;
    my $Self = {};
    bless ($Self, $Type);
    
    $Self->{Debug} = 0;
    $Self->{String} = undef;
    if( defined( $default ) ) {
        $Self->{DefaultEncoding} = $default;
    } else {
        $Self->{DefaultEncoding} = 'iso-8859-2';
    }
    print "UniString::new created, default $Self->{DefaultEncoding}.\n" if $Self->{Debug};
    return $Self;
}


=item set()

Set the string

  $U->set( $s, [encoding] );

Encoding only required for non perl-utf8 strings (example: "iso-8859-2")

=cut

sub set {
    my $Self = shift;
    my $str = shift;
    my $enc = shift;
    
    if( Encode::is_utf8( $str ) ) {
        print "UniString::set got utf8\n" if $Self->{Debug};
        $Self->{String} = $str;
        
    } else {
        if( !defined( $enc ) ) {
            $enc = $Self->{DefaultEncoding};
        }
        print "UniString::set got no utf8, enc $enc.\n" if $Self->{Debug};
        my $enc_str = Encode::decode( $enc, $str, Encode::FB_PERLQQ );
        $Self->{String} = $enc_str;
    }
    
    print "UniString::set did '$Self->{String}'\n" if $Self->{Debug};
    return $Self;
}


=item get()

Get the string using an encoding

  $res = $U->get( [encoding] );

=cut

sub get {
    my $Self = shift;
    my $enc = shift || $Self->{DefaultEncoding};
    
    print "UniString::get src '$Self->{String}'\n" if $Self->{Debug};
    
    my $tmp = $Self->{String};
    my $out = Encode::encode( $enc, $tmp, Encode::FB_PERLQQ );
    
    print "UniString::get results '$out'\n" if $Self->{Debug};
    
    return $out;
}


=item check_get()

Check whether the string could be represented as encoding

  $res = $Ug->check_get( [encoding] );

Returns true if ok.

=cut

sub check_get {
    my $Self = shift;
    my $enc = shift || $Self->{DefaultEncoding};
    
    my $tmp = $Self->{String};
    eval { my $out = Encode::encode( $enc, $tmp, Encode::FB_CROAK ); };
    
    print "ERR?:$@:\n" if $Self->{Debug};
    
    if( $@ ) {
        return 0;
    }
    return 1;
}

1;

=head1 EXAMPLE

 use UniString;

 my $s = new UniString('iso-8859-2');
 $s->set( 'két árva őrült írót nyúz' );

 if( $s->check_get( 'utf-8' ) ) {
    my $r = $s->get( 'utf-8' ) ;
    print "Result: '$r'\n";
    
 } else {
    print "Cannot print this like that.\n";
 }

=head1 LICENCE

This software comes with ABSOLUTELY NO WARRANTY. 
It is licensed under the terms of GNU GPL version 2.

=head1 VERSION

$Id: UniString.pm 694 2005-07-11 15:25:47Z grin $

=cut
