# New Ticket Created by  [EMAIL PROTECTED] 
# Please include the string:  [perl #36569]
# in the subject line of all future correspondence about this issue. 
# <URL: https://rt.perl.org/rt3/Ticket/Display.html?id=36569 >



This is a bug report for perl from [EMAIL PROTECTED],
generated with the help of perlbug 1.35 running under perl v5.8.4.

I ran into this, and wondered if it is a bug.

I have tested on perl 5.8.4 with Encode.pm version 1.99_01 (from
Debian package) and 2.10 (from CPAN).

Basically, if I take a string with a trailing nul, encode it (to any
encoding, even "ascii"), decode it, then chop it, chop returns undef
and the string still has the trailing nul.  If the string instead has
a trailing newline (for example), the chop works correctly.

Am I missing something?

Here is sample output from my test code below:

--
@asc (en/de-coded) before chop
$VAR1 = "hello, world!\n";
$VAR2 = "goodbye, cruel world!\0";

@asc2 (untouched) before chop
$VAR1 = "hello, world!\n";
$VAR2 = "goodbye, cruel world!\0";

@asc (en/de-coded) after chop
$VAR1 = "hello, world!";
$VAR2 = "goodbye, cruel world!\0";

@asc2 (untouched) after chop
$VAR1 = "hello, world!";
$VAR2 = "goodbye, cruel world!";
--

And here is my code:

--
#!/usr/bin/perl -w

use strict;
use Encode;
use Data::Dumper;

$Data::Dumper::Useqq = 1;

my @asc = ("hello, world!\n", "goodbye, cruel world!\0");
my @asc2 = @asc;    # copy of untouched strings

my @utf = (encode('UTF-16LE', $asc[0]),
           encode('UTF-16LE', $asc[1]));

@asc = (decode('UTF-16LE', $utf[0]),
        decode('UTF-16LE', $utf[1]));

print "\n\n";
print "[EMAIL PROTECTED] (en/de-coded) before chop\n", Dumper(@asc), "\n";
print "[EMAIL PROTECTED] (untouched) before chop\n", Dumper(@asc2), "\n";
chop @asc;
chop @asc2;
print "[EMAIL PROTECTED] (en/de-coded) after chop\n", Dumper(@asc), "\n";
print "[EMAIL PROTECTED] (untouched) after chop\n", Dumper(@asc2), "\n";
print "\n\n";
--

-- 
--------------------------------------------------------------------------
Jonathan Hankins        Homewood City Schools

[EMAIL PROTECTED]
--------------------------------------------------------------------------

> I ran into this, and wondered if it is a bug. 

Looks like a bug to me.  At first glance, I'd describe it as a case where 
chop is incapable of removing a null byte from the end of a utf8 string.

Here is another demonstration:


#!/usr/bin/perl

use Encode;

$_ = "foo\0";
while ( /\x00$/ ) {
    printf "chopping from %d bytes\n", length();
    chop;
    sleep 1;
}
printf "okay: %d bytes left\n", length();

$_ = decode( 'ascii', "foo\0" );
while ( /\x00$/ ) {
    printf "chopping from %d utf8 chars\n", length();
    chop;
    sleep 1;
}
printf "okay: %d chars left\n", length();

__END__


For me (macosx 10.3.9/darwin 7.9/perl 5.8.1, and freebsd 5.4/perl 5.8.6),
the second while loop never finishes -- chop never removes the final null.


-----------
David Graff                     Linguistic Data Consortium
[EMAIL PROTECTED]               3600 Market St., Suite 810
voice: (215) 898-0887           University of Pennsylvania
fax:   (215) 573-2175           Philadelphia, PA 19104
                http://www.ldc.upenn.edu


--=-=-=
---
Flags:
    category=core
    severity=low
---
Site configuration information for perl v5.8.4:

Configured by Debian Project at Tue Mar  8 20:31:23 EST 2005.

Summary of my perl5 (revision 5 version 8 subversion 4) configuration:
  Platform:
    osname=linux, osvers=2.4.27-ti1211, archname=i386-linux-thread-multi
    uname='linux kosh 2.4.27-ti1211 #1 sun sep 19 18:17:45 est 2004 i686 
gnulinux '
    config_args='-Dusethreads -Duselargefiles -Dccflags=-DDEBIAN 
-Dcccdlflags=-fPIC -Darchname=i386-linux -Dprefix=/usr 
-Dprivlib=/usr/share/perl/5.8 -Darchlib=/usr/lib/perl/5.8 -Dvendorprefix=/usr 
-Dvendorlib=/usr/share/perl5 -Dvendorarch=/usr/lib/perl5 
-Dsiteprefix=/usr/local -Dsitelib=/usr/local/share/perl/5.8.4 
-Dsitearch=/usr/local/lib/perl/5.8.4 -Dman1dir=/usr/share/man/man1 
-Dman3dir=/usr/share/man/man3 -Dsiteman1dir=/usr/local/man/man1 
-Dsiteman3dir=/usr/local/man/man3 -Dman1ext=1 -Dman3ext=3perl 
-Dpager=/usr/bin/sensible-pager -Uafs -Ud_csh -Uusesfio -Uusenm -Duseshrplib 
-Dlibperl=libperl.so.5.8.4 -Dd_dosuid -des'
    hint=recommended, useposix=true, d_sigaction=define
    usethreads=define use5005threads=undef useithreads=define 
usemultiplicity=define
    useperlio=define d_sfio=undef uselargefiles=define usesocks=undef
    use64bitint=undef use64bitall=undef uselongdouble=undef
    usemymalloc=n, bincompat5005=undef
  Compiler:
    cc='cc', ccflags ='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS -DDEBIAN 
-fno-strict-aliasing -I/usr/local/include -D_LARGEFILE_SOURCE 
-D_FILE_OFFSET_BITS=64',
    optimize='-O2',
    cppflags='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS -DDEBIAN 
-fno-strict-aliasing -I/usr/local/include'
    ccversion='', gccversion='3.3.5 (Debian 1:3.3.5-9)', gccosandvers=''
    intsize=4, longsize=4, ptrsize=4, doublesize=8, byteorder=1234
    d_longlong=define, longlongsize=8, d_longdbl=define, longdblsize=12
    ivtype='long', ivsize=4, nvtype='double', nvsize=8, Off_t='off_t', 
lseeksize=8
    alignbytes=4, prototype=define
  Linker and Libraries:
    ld='cc', ldflags =' -L/usr/local/lib'
    libpth=/usr/local/lib /lib /usr/lib
    libs=-lgdbm -lgdbm_compat -ldb -ldl -lm -lpthread -lc -lcrypt
    perllibs=-ldl -lm -lpthread -lc -lcrypt
    libc=/lib/libc-2.3.2.so, so=so, useshrplib=true, libperl=libperl.so.5.8.4
    gnulibc_version='2.3.2'
  Dynamic Linking:
    dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-Wl,-E'
    cccdlflags='-fPIC', lddlflags='-shared -L/usr/local/lib'

Locally applied patches:
    

---
@INC for perl v5.8.4:
    /etc/perl
    /usr/local/lib/perl/5.8.4
    /usr/local/share/perl/5.8.4
    /usr/lib/perl5
    /usr/share/perl5
    /usr/lib/perl/5.8
    /usr/share/perl/5.8
    /usr/local/lib/site_perl
    .

---
Environment for perl v5.8.4:
    HOME=/home/jhankins
    LANG=en_US
    LANGUAGE (unset)
    LD_LIBRARY_PATH (unset)
    LOGDIR (unset)
    
PATH=/usr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/home/jhankins/bin:/usr/bin/mh
    PERL_BADLANG (unset)
    SHELL=/usr/bin/zsh

Reply via email to