Nicholas Clark writes:
On Sun, Sep 07, 2003 at 02:50:35PM +, Peter Gibbs wrote:
# New Ticket Created by Peter Gibbs
# Please include the string: [perl #23752]
# in the subject line of all future correspondence about this issue.
# URL: http://rt.perl.org/rt2/Ticket/Display.html?id=23752
The 'index' opcode searches for a substring in another string. It seems
reasonable to expect that equal strings would match at offset zero. But the
program:
set S0, \xAB
find_chartype I0, 8859-1
set_chartype S0, I0
find_encoding I0, singlebyte
set_encoding S0, I0
find_encoding I0, utf8
find_chartype I1, unicode
transcode S1, S0, I0, I1
eq S0, S1, equal
print not
equal:
print equal\n
index I0, S0, S1
print I0
print \n
end
yields the output:
equal
-1
which doesn't seem quite right.
Looks like a bug to me, but I'm no expert on where to start searching for
it.
Here's a fix.
Luke
Index: string.c
===
RCS file: /cvs/public/parrot/string.c,v
retrieving revision 1.145
diff -u -r1.145 string.c
--- string.c7 Sep 2003 12:45:48 - 1.145
+++ string.c21 Sep 2003 22:23:30 -
@@ -327,17 +327,17 @@
string_str_index_multibyte(struct Parrot_Interp *interpreter,
const STRING *str, const STRING *find, UINTVAL start)
{
-const void* const lastmatch =
-str-encoding-skip_backward((char*)str-strstart + str-strlen,
-find-encoding-characters(find, find-strlen));
-const void* const lastfind = (char*)find-strstart + find-strlen;
+const void* const lastmatch = str-encoding-skip_backward(
+(char*)str-strstart + str-buflen, find-strlen);
+const void* const lastfind = find-encoding-skip_forward(
+find-strstart, find-strlen);
const void* sp;
const void* fp;
const void* ip;
INTVAL pos = start;
sp = str-encoding-skip_forward(str-strstart, start);
-while (sp lastmatch) {
+while (sp = lastmatch) {
fp = find-strstart;
ip = sp;
Index: t/op/string.t
===
RCS file: /cvs/public/parrot/t/op/string.t,v
retrieving revision 1.54
diff -u -r1.54 string.t
--- t/op/string.t 20 Aug 2003 09:43:16 - 1.54
+++ t/op/string.t 21 Sep 2003 22:23:30 -
@@ -1,6 +1,6 @@
#! perl -w
-use Parrot::Test tests = 117;
+use Parrot::Test tests = 119;
use Test::More;
output_is( 'CODE', OUTPUT, set_s_s|sc );
@@ -1294,6 +1294,57 @@
CODE
46368
-1
+OUTPUT
+
+output_is('CODE',OUTPUT,index, multibyte matching);
+set S0, \xAB
+find_chartype I0, usascii
+set_chartype S0, I0
+find_encoding I0, singlebyte
+set_encoding S0, I0
+
+find_encoding I0, utf8
+find_chartype I1, unicode
+transcode S1, S0, I0, I1
+
+eq S0, S1, equal
+print not
+equal:
+print equal\n
+
+index I0, S0, S1
+print I0
+print \n
+index I0, S1, S0
+print I0
+print \n
+CODE
+equal
+0
+0
+OUTPUT
+
+output_is('CODE',OUTPUT,index, multibyte matching 2);
+set S0, \xAB\xBA
+set S1, foo\xAB\xAB\xBAbar
+find_chartype I0, usascii
+set_chartype S0, I0
+find_encoding I0, singlebyte
+set_encoding S0, I0
+
+find_chartype I0, unicode
+find_encoding I1, utf8
+transcode S1, S1, I1, I0
+
+index I0, S0, S1
+print I0
+print \n
+index I0, S1, S0
+print I0
+print \n
+CODE
+-1
+4
OUTPUT
output_is('CODE',OUTPUT,num to string);