Change 30021 by [EMAIL PROTECTED] on 2007/01/26 19:23:54
Integrate:
[ 28490]
Introduce a new function, pv_escape(), to display contents of PVs
that might contain non printable chars.
Subject: Re: [PATCH]: fix: [perl #39583] Pattern Match fails for
specific length string
From: demerphq <[EMAIL PROTECTED]>
Date: Wed, 5 Jul 2006 20:40:58 +0200
Message-ID: <[EMAIL PROTECTED]>
[ 28582]
Subject: Updated escaping code. utf8 regex debug output improvements
From: demerphq <[EMAIL PROTECTED]>
Date: Sat, 15 Jul 2006 18:56:03 +0200
Message-Id: <[EMAIL PROTECTED]>
[ 28584]
run regen_headerds
Affected files ...
... //depot/maint-5.8/perl/dump.c#70 edit
... //depot/maint-5.8/perl/embed.fnc#198 integrate
... //depot/maint-5.8/perl/embed.h#149 integrate
... //depot/maint-5.8/perl/global.sym#58 integrate
... //depot/maint-5.8/perl/perl.h#142 integrate
... //depot/maint-5.8/perl/pod/perlapi.pod#93 integrate
... //depot/maint-5.8/perl/proto.h#189 integrate
... //depot/maint-5.8/perl/reentr.c#19 integrate
Differences ...
==== //depot/maint-5.8/perl/dump.c#70 (text) ====
Index: perl/dump.c
--- perl/dump.c#69~29993~ 2007-01-26 01:15:17.000000000 -0800
+++ perl/dump.c 2007-01-26 11:23:54.000000000 -0800
@@ -113,40 +113,208 @@
op_dump(PL_eval_root);
}
-char *
-Perl_pv_display(pTHX_ SV *dsv, char *pv, STRLEN cur, STRLEN len, STRLEN pvlim)
-{
- const bool nul_terminated = len > cur && pv[cur] == '\0';
- bool truncated = 0;
- sv_setpvn(dsv, "\"", 1);
- for (; cur--; pv++) {
- if (pvlim && SvCUR(dsv) >= pvlim) {
- truncated = 1;
- break;
- }
- switch (*pv) {
- case '\t': sv_catpvs(dsv, "\\t"); break;
- case '\n': sv_catpvs(dsv, "\\n"); break;
- case '\r': sv_catpvs(dsv, "\\r"); break;
- case '\f': sv_catpvs(dsv, "\\f"); break;
- case '"': sv_catpvs(dsv, "\\\""); break;
- case '\\': sv_catpvs(dsv, "\\\\"); break;
- default:
- if (isPRINT(*pv))
- sv_catpvn(dsv, pv, 1);
- else if (cur && isDIGIT(*(pv+1)))
- Perl_sv_catpvf(aTHX_ dsv, "\\%03o", (U8)*pv);
- else
- Perl_sv_catpvf(aTHX_ dsv, "\\%o", (U8)*pv);
- }
+/*
+=for apidoc Apd|char*|pv_escape|NN SV *dsv|NN const U8 const *str\
+ |const STRLEN count|const STRLEN max
+ |STRLEN const *escaped, const U32 flags
+
+Escapes at most the first "count" chars of pv and puts the results into
+dsv such that the size of the escaped string will not exceed "max" chars
+and will not contain any incomplete escape sequences.
+
+If flags contains PERL_PV_ESCAPE_QUOTE then any double quotes in the string
+will also be escaped.
+
+Normally the SV will be cleared before the escaped string is prepared,
+but when PERL_PV_ESCAPE_NOCLEAR is set this will not occur.
+
+If PERL_PV_ESCAPE_UNI is set then the input string is treated as unicode,
+if PERL_PV_ESCAPE_UNI_DETECT is set then the input string is scanned
+using C<is_utf8_string()> to determine if it is unicode.
+
+If PERL_PV_ESCAPE_ALL is set then all input chars will be output
+using C<\x01F1> style escapes, otherwise only chars above 255 will be
+escaped using this style, other non printable chars will use octal or
+common escaped patterns like C<\n>. If PERL_PV_ESCAPE_NOBACKSLASH
+then all chars below 255 will be treated as printable and
+will be output as literals.
+
+If PERL_PV_ESCAPE_FIRSTCHAR is set then only the first char of the
+string will be escaped, regardles of max. If the string is utf8 and
+the chars value is >255 then it will be returned as a plain hex
+sequence. Thus the output will either be a single char,
+an octal escape sequence, a special escape like C<\n> or a 3 or
+more digit hex value.
+
+Returns a pointer to the escaped text as held by dsv.
+
+=cut
+*/
+#define PV_ESCAPE_OCTBUFSIZE 32
+char *
+Perl_pv_escape( pTHX_ SV *dsv, U8 const * const str,
+ const STRLEN count, const STRLEN max,
+ STRLEN * const escaped, const U32 flags )
+{
+ U8 dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : '\\';
+ U8 octbuf[PV_ESCAPE_OCTBUFSIZE] = "\\123456789ABCDF";
+ STRLEN wrote = 0; /* chars written so far */
+ STRLEN chsize = 0; /* size of data to be written */
+ STRLEN readsize = 1; /* size of data just read */
+ bool isuni= flags & PERL_PV_ESCAPE_UNI ? 1 : 0; /* is this unicode */
+ const U8 *pv = str;
+ const U8 *end = pv + count; /* end of string */
+
+ if (!flags & PERL_PV_ESCAPE_NOCLEAR)
+ sv_setpvn(dsv, "", 0);
+
+ if ((flags & PERL_PV_ESCAPE_UNI_DETECT) && is_utf8_string((U8 *)pv, count))
+ isuni = 1;
+
+ for ( ; (pv < end && (!max || (wrote < max))) ; pv += readsize ) {
+ const UV u= (isuni) ? utf8_to_uvchr((U8 *)pv, &readsize) : *pv;
+ const U8 c = (U8)u & 0xFF;
+
+ if ( ( u > 255 ) || (flags & PERL_PV_ESCAPE_ALL)) {
+ if (flags & PERL_PV_ESCAPE_FIRSTCHAR)
+ chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
+ "%"UVxf, u);
+ else
+ chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
+ "\\x{%"UVxf"}", u);
+ } else if (flags & PERL_PV_ESCAPE_NOBACKSLASH) {
+ chsize = 1;
+ } else {
+ if ( (c == dq) || (c == '\\') || !isPRINT(c) ) {
+ chsize = 2;
+ switch (c) {
+ case '\\' : octbuf[1] = '\\'; break;
+ case '\v' : octbuf[1] = 'v'; break;
+ case '\t' : octbuf[1] = 't'; break;
+ case '\r' : octbuf[1] = 'r'; break;
+ case '\n' : octbuf[1] = 'n'; break;
+ case '\f' : octbuf[1] = 'f'; break;
+ case '"' :
+ if ( dq == '"' )
+ octbuf[1] = '"';
+ else
+ chsize = 1;
+ break;
+ default:
+ if ( (pv < end) && isDIGIT(*(pv+readsize)) )
+ chsize = my_snprintf( octbuf,
PV_ESCAPE_OCTBUFSIZE,
+ "\\%03o", c);
+ else
+ chsize = my_snprintf( octbuf,
PV_ESCAPE_OCTBUFSIZE,
+ "\\%o", c);
+ }
+ } else {
+ chsize=1;
+ }
+ }
+ if ( max && (wrote + chsize > max) ) {
+ break;
+ } else if (chsize > 1) {
+ sv_catpvn(dsv, octbuf, chsize);
+ wrote += chsize;
+ } else {
+ Perl_sv_catpvf( aTHX_ dsv, "%c", c);
+ wrote++;
+ }
+ if ( flags & PERL_PV_ESCAPE_FIRSTCHAR )
+ break;
}
- sv_catpvs(dsv, "\"");
- if (truncated)
- sv_catpvs(dsv, "...");
- if (nul_terminated)
- sv_catpvs(dsv, "\\0");
+ if (escaped != NULL)
+ *escaped= pv - str;
+ return SvPVX(dsv);
+}
+/*
+=for apidoc Apd|char *|pv_pretty|NN SV *dsv|NN const U8 const *str\
+ |const STRLEN count|const STRLEN max\
+ |const U8 const *start_color| const U8 const *end_color\
+ |const U32 flags
+
+Converts a string into something presentable, handling escaping via
+pv_escape() and supporting quoting and elipses.
+
+If the PERL_PV_PRETTY_QUOTE flag is set then the result will be
+double quoted with any double quotes in the string escaped. Otherwise
+if the PERL_PV_PRETTY_LTGT flag is set then the result be wrapped in
+angle brackets.
+
+If the PERL_PV_PRETTY_ELIPSES flag is set and not all characters in
+string were output then an elipses C<...> will be appended to the
+string. Note that this happens AFTER it has been quoted.
+
+If start_color is non-null then it will be inserted after the opening
+quote (if there is one) but before the escaped text. If end_color
+is non-null then it will be inserted after the escaped text but before
+any quotes or elipses.
+
+Returns a pointer to the prettified text as held by dsv.
+
+=cut
+*/
+
+char *
+Perl_pv_pretty( pTHX_ SV *dsv, U8 const * const str, const STRLEN count,
+ const STRLEN max, U8 const * const start_color, U8 const * const end_color,
+ const U32 flags )
+{
+ U8 dq = (flags & PERL_PV_PRETTY_QUOTE) ? '"' : '\\';
+ STRLEN escaped;
+
+ if ( dq == '"' )
+ sv_setpvn(dsv, "\"", 1);
+ else if ( flags & PERL_PV_PRETTY_LTGT )
+ sv_setpvn(dsv, "<", 1);
+ else
+ sv_setpvn(dsv, "", 0);
+
+ if ( start_color != NULL )
+ Perl_sv_catpvf( aTHX_ dsv, "%s", start_color);
+
+ pv_escape( dsv, str, count, max, &escaped, flags | PERL_PV_ESCAPE_NOCLEAR
);
+
+ if ( end_color != NULL )
+ Perl_sv_catpvf( aTHX_ dsv, "%s", end_color);
+
+ if ( dq == '"' )
+ sv_catpvn( dsv, "\"", 1 );
+ else if ( flags & PERL_PV_PRETTY_LTGT )
+ sv_catpvn( dsv, ">", 1);
+
+ if ( (flags & PERL_PV_PRETTY_ELIPSES) && ( escaped < count ) )
+ sv_catpvn( dsv, "...", 3 );
+
+ return SvPVX(dsv);
+}
+
+/*
+=for apidoc pv_display
+
+ char *pv_display(SV *dsv, const char *pv, STRLEN cur, STRLEN len,
+ STRLEN pvlim, U32 flags)
+Similar to
+
+ pv_escape(dsv,pv,cur,pvlim,PERL_PV_ESCAPE_QUOTE);
+
+except that an additional "\0" will be appended to the string when
+len > cur and pv[cur] is "\0".
+
+Note that the final string may be up to 7 chars longer than pvlim.
+
+=cut
+*/
+
+char *
+Perl_pv_display(pTHX_ SV *dsv, char *pv, STRLEN cur, STRLEN len, STRLEN pvlim)
+{
+ pv_pretty( dsv, (char *)pv, cur, pvlim, 0, 0, PERL_PV_PRETTY_DUMP);
+ if (len > cur && pv[cur] == '\0')
+ sv_catpvn( dsv, "\\0", 2 );
return SvPVX(dsv);
}
==== //depot/maint-5.8/perl/embed.fnc#198 (text) ====
Index: perl/embed.fnc
--- perl/embed.fnc#197~30007~ 2007-01-26 05:50:55.000000000 -0800
+++ perl/embed.fnc 2007-01-26 11:23:54.000000000 -0800
@@ -967,8 +967,17 @@
Apd |void |sv_setsv_mg |NN SV *dstr|NULLOK SV *sstr
Apdbm |void |sv_usepvn_mg |NN SV *sv|NULLOK char *ptr|STRLEN len
ApR |MGVTBL*|get_vtbl |int vtbl_id
-Ap |char* |pv_display |NN SV *dsv|NN char *pv|STRLEN cur|STRLEN len \
+Apd |char* |pv_display |NN SV *dsv|NN char *pv|STRLEN cur|STRLEN len \
|STRLEN pvlim
+Apd |char* |pv_escape |NN SV *dsv|NN U8 const * const str\
+ |const STRLEN count|const STRLEN max\
+ |NULLOK STRLEN * const escaped\
+ |const U32 flags
+Apd |char* |pv_pretty |NN SV *dsv|NN U8 const * const str\
+ |const STRLEN count|const STRLEN max\
+ |NULLOK U8 const * const start_color\
+ |NULLOK U8 const * const end_color\
+ |const U32 flags
Afp |void |dump_indent |I32 level|NN PerlIO *file|NN const char*
pat|...
Ap |void |dump_vindent |I32 level|NN PerlIO *file|NN const char* pat \
|NULLOK va_list *args
==== //depot/maint-5.8/perl/embed.h#149 (text+w) ====
Index: perl/embed.h
--- perl/embed.h#148~30007~ 2007-01-26 05:50:55.000000000 -0800
+++ perl/embed.h 2007-01-26 11:23:54.000000000 -0800
@@ -991,6 +991,8 @@
#define sv_setsv_mg Perl_sv_setsv_mg
#define get_vtbl Perl_get_vtbl
#define pv_display Perl_pv_display
+#define pv_escape Perl_pv_escape
+#define pv_pretty Perl_pv_pretty
#define dump_indent Perl_dump_indent
#define dump_vindent Perl_dump_vindent
#define do_gv_dump Perl_do_gv_dump
@@ -1315,6 +1317,7 @@
#define to_utf8_substr S_to_utf8_substr
#define to_byte_substr S_to_byte_substr
#endif
+#define debug_start_match S_debug_start_match
#endif
#if defined(PERL_IN_DUMP_C) || defined(PERL_DECL_PROT)
#ifdef PERL_CORE
@@ -3085,6 +3088,8 @@
#define sv_setsv_mg(a,b) Perl_sv_setsv_mg(aTHX_ a,b)
#define get_vtbl(a) Perl_get_vtbl(aTHX_ a)
#define pv_display(a,b,c,d,e) Perl_pv_display(aTHX_ a,b,c,d,e)
+#define pv_escape(a,b,c,d,e,f) Perl_pv_escape(aTHX_ a,b,c,d,e,f)
+#define pv_pretty(a,b,c,d,e,f,g) Perl_pv_pretty(aTHX_ a,b,c,d,e,f,g)
#define dump_vindent(a,b,c,d) Perl_dump_vindent(aTHX_ a,b,c,d)
#define do_gv_dump(a,b,c,d) Perl_do_gv_dump(aTHX_ a,b,c,d)
#define do_gvgv_dump(a,b,c,d) Perl_do_gvgv_dump(aTHX_ a,b,c,d)
==== //depot/maint-5.8/perl/global.sym#58 (text+w) ====
Index: perl/global.sym
--- perl/global.sym#57~30020~ 2007-01-26 11:03:49.000000000 -0800
+++ perl/global.sym 2007-01-26 11:23:54.000000000 -0800
@@ -592,6 +592,8 @@
Perl_sv_usepvn_mg
Perl_get_vtbl
Perl_pv_display
+Perl_pv_escape
+Perl_pv_pretty
Perl_dump_indent
Perl_dump_vindent
Perl_do_gv_dump
==== //depot/maint-5.8/perl/perl.h#142 (text) ====
Index: perl/perl.h
--- perl/perl.h#141~30007~ 2007-01-26 05:50:55.000000000 -0800
+++ perl/perl.h 2007-01-26 11:23:54.000000000 -0800
@@ -5588,6 +5588,33 @@
so that Configure picks them up. */
+/* These are used by Perl_pv_escape() and Perl_pv_pretty()
+ * are here so that they are available throughout the core
+ * NOTE that even though some are for _escape and some for _pretty
+ * there must not be any clashes as the flags from _pretty are
+ * passed straight through to _escape.
+ */
+
+#define PERL_PV_ESCAPE_QUOTE 0x0001
+#define PERL_PV_PRETTY_QUOTE PERL_PV_ESCAPE_QUOTE
+
+
+#define PERL_PV_PRETTY_ELIPSES 0x0002
+#define PERL_PV_PRETTY_LTGT 0x0004
+
+#define PERL_PV_ESCAPE_FIRSTCHAR 0x0008
+
+#define PERL_PV_ESCAPE_UNI 0x0100
+#define PERL_PV_ESCAPE_UNI_DETECT 0x0200
+
+#define PERL_PV_ESCAPE_ALL 0x1000
+#define PERL_PV_ESCAPE_NOBACKSLASH 0x2000
+#define PERL_PV_ESCAPE_NOCLEAR 0x4000
+
+/* used by pv_display in dump.c*/
+#define PERL_PV_PRETTY_DUMP PERL_PV_PRETTY_ELIPSES|PERL_PV_PRETTY_QUOTE
+#define PERL_PV_PRETTY_REGPROP PERL_PV_PRETTY_ELIPSES|PERL_PV_PRETTY_LTGT
+
/* Source code compatibility cruft:
PERL_XS_APIVERSION is not used, and has been superseded by inc_version_list
It and PERL_PM_APIVERSION are retained for source compatibility in the
==== //depot/maint-5.8/perl/pod/perlapi.pod#93 (text+w) ====
Index: perl/pod/perlapi.pod
--- perl/pod/perlapi.pod#92~29997~ 2007-01-26 02:30:23.000000000 -0800
+++ perl/pod/perlapi.pod 2007-01-26 11:23:54.000000000 -0800
@@ -748,6 +748,62 @@
=back
+=head1 Functions in file dump.c
+
+
+=over 8
+
+=item pv_display
+X<pv_display>
+
+ char *pv_display(SV *dsv, const char *pv, STRLEN cur, STRLEN len,
+ STRLEN pvlim, U32 flags)
+
+Similar to
+
+ pv_escape(dsv,pv,cur,pvlim,PERL_PV_ESCAPE_QUOTE);
+
+except that an additional "\0" will be appended to the string when
+len > cur and pv[cur] is "\0".
+
+Note that the final string may be up to 7 chars longer than pvlim.
+
+ char* pv_display(SV *dsv, const char *pv, STRLEN cur, STRLEN len,
STRLEN pvlim)
+
+=for hackers
+Found in file dump.c
+
+=item pv_escape
+X<pv_escape>
+
+Escapes at most the first "count" chars of pv and puts the results into
+buf such that the size of the escaped string will not exceed "max" chars
+and will not contain any incomplete escape sequences.
+
+If flags contains PERL_PV_ESCAPE_QUOTE then the string will have quotes
+placed around it; moreover, if the number of chars converted was less than
+"count" then a trailing elipses (...) will be added after the closing
+quote.
+
+If PERL_PV_ESCAPE_QUOTE is not set, but PERL_PV_ESCAPE_PADR is, then the
+returned string will be right padded with spaces such that it is max chars
+long.
+
+Normally the SV will be cleared before the escaped string is prepared,
+but when PERL_PV_ESCAPE_CAT is set this will not occur.
+
+Returns a pointer to the string contained by SV.
+
+NOTE: the perl_ form of this function is deprecated.
+
+ char* pv_escape(SV *dsv, const char *pv, const STRLEN count, const
STRLEN max, const U32 flags)
+
+=for hackers
+Found in file dump.c
+
+
+=back
+
=head1 Functions in file mathoms.c
==== //depot/maint-5.8/perl/proto.h#189 (text+w) ====
Index: perl/proto.h
--- perl/proto.h#188~30020~ 2007-01-26 11:03:49.000000000 -0800
+++ perl/proto.h 2007-01-26 11:23:54.000000000 -0800
@@ -1410,6 +1410,8 @@
__attribute__warn_unused_result__;
PERL_CALLCONV char* Perl_pv_display(pTHX_ SV *dsv, char *pv, STRLEN cur,
STRLEN len, STRLEN pvlim);
+PERL_CALLCONV char* Perl_pv_escape(pTHX_ SV *dsv, U8 const * const str,
const STRLEN count, const STRLEN max, STRLEN * const escaped, const U32 flags);
+PERL_CALLCONV char* Perl_pv_pretty(pTHX_ SV *dsv, U8 const * const str,
const STRLEN count, const STRLEN max, U8 const * const start_color, U8 const *
const end_color, const U32 flags);
PERL_CALLCONV void Perl_dump_indent(pTHX_ I32 level, PerlIO *file, const
char* pat, ...)
__attribute__format__(__printf__,pTHX_3,pTHX_4);
End of Patch.