# New Ticket Created by [EMAIL PROTECTED]
# Please include the string: [perl #36569]
# in the subject line of all future correspondence about this issue.
# <URL: https://rt.perl.org/rt3/Ticket/Display.html?id=36569 >
This is a bug report for perl from [EMAIL PROTECTED],
generated with the help of perlbug 1.35 running under perl v5.8.4.
I ran into this, and wondered if it is a bug.
I have tested on perl 5.8.4 with Encode.pm version 1.99_01 (from
Debian package) and 2.10 (from CPAN).
Basically, if I take a string with a trailing nul, encode it (to any
encoding, even "ascii"), decode it, then chop it, chop returns undef
and the string still has the trailing nul. If the string instead has
a trailing newline (for example), the chop works correctly.
Am I missing something?
Here is sample output from my test code below:
--
@asc (en/de-coded) before chop
$VAR1 = "hello, world!\n";
$VAR2 = "goodbye, cruel world!\0";
@asc2 (untouched) before chop
$VAR1 = "hello, world!\n";
$VAR2 = "goodbye, cruel world!\0";
@asc (en/de-coded) after chop
$VAR1 = "hello, world!";
$VAR2 = "goodbye, cruel world!\0";
@asc2 (untouched) after chop
$VAR1 = "hello, world!";
$VAR2 = "goodbye, cruel world!";
--
And here is my code:
--
#!/usr/bin/perl -w
use strict;
use Encode;
use Data::Dumper;
$Data::Dumper::Useqq = 1;
my @asc = ("hello, world!\n", "goodbye, cruel world!\0");
my @asc2 = @asc; # copy of untouched strings
my @utf = (encode('UTF-16LE', $asc[0]),
encode('UTF-16LE', $asc[1]));
@asc = (decode('UTF-16LE', $utf[0]),
decode('UTF-16LE', $utf[1]));
print "\n\n";
print "[EMAIL PROTECTED] (en/de-coded) before chop\n", Dumper(@asc), "\n";
print "[EMAIL PROTECTED] (untouched) before chop\n", Dumper(@asc2), "\n";
chop @asc;
chop @asc2;
print "[EMAIL PROTECTED] (en/de-coded) after chop\n", Dumper(@asc), "\n";
print "[EMAIL PROTECTED] (untouched) after chop\n", Dumper(@asc2), "\n";
print "\n\n";
--
--
--------------------------------------------------------------------------
Jonathan Hankins Homewood City Schools
[EMAIL PROTECTED]
--------------------------------------------------------------------------
> I ran into this, and wondered if it is a bug.
Looks like a bug to me. At first glance, I'd describe it as a case where
chop is incapable of removing a null byte from the end of a utf8 string.
Here is another demonstration:
#!/usr/bin/perl
use Encode;
$_ = "foo\0";
while ( /\x00$/ ) {
printf "chopping from %d bytes\n", length();
chop;
sleep 1;
}
printf "okay: %d bytes left\n", length();
$_ = decode( 'ascii', "foo\0" );
while ( /\x00$/ ) {
printf "chopping from %d utf8 chars\n", length();
chop;
sleep 1;
}
printf "okay: %d chars left\n", length();
__END__
For me (macosx 10.3.9/darwin 7.9/perl 5.8.1, and freebsd 5.4/perl 5.8.6),
the second while loop never finishes -- chop never removes the final null.
-----------
David Graff Linguistic Data Consortium
[EMAIL PROTECTED] 3600 Market St., Suite 810
voice: (215) 898-0887 University of Pennsylvania
fax: (215) 573-2175 Philadelphia, PA 19104
http://www.ldc.upenn.edu
--=-=-=
---
Flags:
category=core
severity=low
---
Site configuration information for perl v5.8.4:
Configured by Debian Project at Tue Mar 8 20:31:23 EST 2005.
Summary of my perl5 (revision 5 version 8 subversion 4) configuration:
Platform:
osname=linux, osvers=2.4.27-ti1211, archname=i386-linux-thread-multi
uname='linux kosh 2.4.27-ti1211 #1 sun sep 19 18:17:45 est 2004 i686
gnulinux '
config_args='-Dusethreads -Duselargefiles -Dccflags=-DDEBIAN
-Dcccdlflags=-fPIC -Darchname=i386-linux -Dprefix=/usr
-Dprivlib=/usr/share/perl/5.8 -Darchlib=/usr/lib/perl/5.8 -Dvendorprefix=/usr
-Dvendorlib=/usr/share/perl5 -Dvendorarch=/usr/lib/perl5
-Dsiteprefix=/usr/local -Dsitelib=/usr/local/share/perl/5.8.4
-Dsitearch=/usr/local/lib/perl/5.8.4 -Dman1dir=/usr/share/man/man1
-Dman3dir=/usr/share/man/man3 -Dsiteman1dir=/usr/local/man/man1
-Dsiteman3dir=/usr/local/man/man3 -Dman1ext=1 -Dman3ext=3perl
-Dpager=/usr/bin/sensible-pager -Uafs -Ud_csh -Uusesfio -Uusenm -Duseshrplib
-Dlibperl=libperl.so.5.8.4 -Dd_dosuid -des'
hint=recommended, useposix=true, d_sigaction=define
usethreads=define use5005threads=undef useithreads=define
usemultiplicity=define
useperlio=define d_sfio=undef uselargefiles=define usesocks=undef
use64bitint=undef use64bitall=undef uselongdouble=undef
usemymalloc=n, bincompat5005=undef
Compiler:
cc='cc', ccflags ='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS -DDEBIAN
-fno-strict-aliasing -I/usr/local/include -D_LARGEFILE_SOURCE
-D_FILE_OFFSET_BITS=64',
optimize='-O2',
cppflags='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS -DDEBIAN
-fno-strict-aliasing -I/usr/local/include'
ccversion='', gccversion='3.3.5 (Debian 1:3.3.5-9)', gccosandvers=''
intsize=4, longsize=4, ptrsize=4, doublesize=8, byteorder=1234
d_longlong=define, longlongsize=8, d_longdbl=define, longdblsize=12
ivtype='long', ivsize=4, nvtype='double', nvsize=8, Off_t='off_t',
lseeksize=8
alignbytes=4, prototype=define
Linker and Libraries:
ld='cc', ldflags =' -L/usr/local/lib'
libpth=/usr/local/lib /lib /usr/lib
libs=-lgdbm -lgdbm_compat -ldb -ldl -lm -lpthread -lc -lcrypt
perllibs=-ldl -lm -lpthread -lc -lcrypt
libc=/lib/libc-2.3.2.so, so=so, useshrplib=true, libperl=libperl.so.5.8.4
gnulibc_version='2.3.2'
Dynamic Linking:
dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-Wl,-E'
cccdlflags='-fPIC', lddlflags='-shared -L/usr/local/lib'
Locally applied patches:
---
@INC for perl v5.8.4:
/etc/perl
/usr/local/lib/perl/5.8.4
/usr/local/share/perl/5.8.4
/usr/lib/perl5
/usr/share/perl5
/usr/lib/perl/5.8
/usr/share/perl/5.8
/usr/local/lib/site_perl
.
---
Environment for perl v5.8.4:
HOME=/home/jhankins
LANG=en_US
LANGUAGE (unset)
LD_LIBRARY_PATH (unset)
LOGDIR (unset)
PATH=/usr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/home/jhankins/bin:/usr/bin/mh
PERL_BADLANG (unset)
SHELL=/usr/bin/zsh