On Mon, Jan 8, 2024 at 10:49 AM Ruediger Pluem <[email protected]> wrote:
>
> On 1/5/24 3:08 PM, Yann Ylavic wrote:
> >
> > process_regexp.diff
> >
> > Index: modules/metadata/mod_headers.c
> > ===================================================================
> > --- modules/metadata/mod_headers.c (revision 1914804)
> > +++ modules/metadata/mod_headers.c (working copy)
> > @@ -622,41 +622,70 @@ static char* process_tags(header_entry *hdr, reque
> > }
> > return str ? str : "";
> > }
> > -static const char *process_regexp(header_entry *hdr, const char *value,
> > - request_rec *r)
> > +
> > +static const char *process_regexp_rec(header_entry *hdr, const char *value,
> > + request_rec *r, ap_regmatch_t
> > pmatch[],
> > + int flags)
> > {
> > - ap_regmatch_t pmatch[AP_MAX_REG_MATCH];
> > - const char *subs;
> > - const char *remainder;
> > char *ret;
> > - int diffsz;
> > - if (ap_regexec(hdr->regex, value, AP_MAX_REG_MATCH, pmatch, 0)) {
> > + const char *subs, *remainder;
> > + apr_size_t val_len, subs_len, rem_len;
> > + apr_size_t match_start, match_end;
> > +
> > + val_len = strlen(value);
> > + if (ap_regexec_len(hdr->regex, value, val_len,
> > + AP_MAX_REG_MATCH, pmatch,
> > + flags)) {
> > /* no match, nothing to do */
> > return value;
> > }
> > +
> > /* Process tags in the input string rather than the resulting
> > * substitution to avoid surprises
> > */
> > - subs = ap_pregsub(r->pool, process_tags(hdr, r), value,
> > AP_MAX_REG_MATCH, pmatch);
> > + subs = ap_pregsub(r->pool, process_tags(hdr, r), value,
> > + AP_MAX_REG_MATCH, pmatch);
> > if (subs == NULL)
> > return NULL;
> > - diffsz = strlen(subs) - (pmatch[0].rm_eo - pmatch[0].rm_so);
> > + subs_len = strlen(subs);
> > +
> > + ap_assert(pmatch[0].rm_so >= 0 && pmatch[0].rm_so <= pmatch[0].rm_eo);
> > + match_start = pmatch[0].rm_so;
> > + match_end = pmatch[0].rm_eo;
> > if (hdr->action == hdr_edit) {
> > - remainder = value + pmatch[0].rm_eo;
> > + remainder = value + match_end;
> > }
> > else { /* recurse to edit multiple matches if applicable */
> > - remainder = process_regexp(hdr, value + pmatch[0].rm_eo, r);
> > - if (remainder == NULL)
> > - return NULL;
> > - diffsz += strlen(remainder) - strlen(value + pmatch[0].rm_eo);
> > + if (match_end == 0 && val_len) {
> > + /* advance on empty match to avoid infinite recursion */
> > + match_start = match_end = 1;
>
> Not debating that
>
> Header edit* headername ^ prefix
>
> should be be
>
> Header edit headername ^ prefix
>
> but wouldn't that do the wrong thing and add the prefix *after* the first
> character of the header value?
Yes correct, this patch won't set the skipped char at the right place finally.
New v2 attached.
>
> > + }
> > + if (match_end < val_len) {
> > + remainder = process_regexp_rec(hdr, value + match_end,
>
> Shouldn't we increase match_end just here by 1 if match_end == 0 && val_len ?
See v2, match_end is not used below anyway.
>
> > + r, pmatch, AP_REG_NOTBOL);
As noted in v2 we have an issue here by "losing" the beginning of the
value on recursion:
/* XXX: recursing by using AP_REG_NOTBOL (because we are not at ^
* anymore) and then "losing" the beginning of the string is not
* always correct. Say we match "(?<=a)ba" against "ababa", on
* recursion ap_regexec_len() will not know that the second "b" is
* preceded by "a" thus not match. We'd need a new ap_regexec_ex()
* that can take match_end as an offset to fix this..
*/
Not sure how far we should go with this patch..
Regards;
Yann.
Index: modules/metadata/mod_headers.c
===================================================================
--- modules/metadata/mod_headers.c (revision 1914804)
+++ modules/metadata/mod_headers.c (working copy)
@@ -622,41 +622,90 @@ static char* process_tags(header_entry *hdr, reque
}
return str ? str : "";
}
-static const char *process_regexp(header_entry *hdr, const char *value,
- request_rec *r)
+
+static const char *process_regexp_core(header_entry *hdr,
+ const char *val, apr_size_t val_len,
+ request_rec *r, ap_regmatch_t *pmatch,
+ int flags)
{
- ap_regmatch_t pmatch[AP_MAX_REG_MATCH];
- const char *subs;
- const char *remainder;
char *ret;
- int diffsz;
- if (ap_regexec(hdr->regex, value, AP_MAX_REG_MATCH, pmatch, 0)) {
+ const char *sub, *rem;
+ apr_size_t sub_len, rem_sz;
+ apr_size_t match_start, match_end;
+ char skip = 0;
+
+ if (ap_regexec_len(hdr->regex, val, val_len,
+ AP_MAX_REG_MATCH, pmatch, flags)) {
/* no match, nothing to do */
- return value;
+ return val;
}
+
/* Process tags in the input string rather than the resulting
* substitution to avoid surprises
*/
- subs = ap_pregsub(r->pool, process_tags(hdr, r), value, AP_MAX_REG_MATCH, pmatch);
- if (subs == NULL)
+ sub = ap_pregsub(r->pool, process_tags(hdr, r), val,
+ AP_MAX_REG_MATCH, pmatch);
+ if (sub == NULL)
return NULL;
- diffsz = strlen(subs) - (pmatch[0].rm_eo - pmatch[0].rm_so);
+ sub_len = strlen(sub);
+
+ ap_assert(pmatch[0].rm_so >= 0 && pmatch[0].rm_so <= pmatch[0].rm_eo);
+ match_start = pmatch[0].rm_so;
+ match_end = pmatch[0].rm_eo;
if (hdr->action == hdr_edit) {
- remainder = value + pmatch[0].rm_eo;
+ rem = val + match_end;
}
else { /* recurse to edit multiple matches if applicable */
- remainder = process_regexp(hdr, value + pmatch[0].rm_eo, r);
- if (remainder == NULL)
- return NULL;
- diffsz += strlen(remainder) - strlen(value + pmatch[0].rm_eo);
+ /* On empty match always advance by one char to avoid an infinite
+ * recursion, but save the skipped char which should go after the
+ * substitution still.
+ */
+ if (val_len && !match_end) {
+ match_end = 1;
+ skip = *val;
+ }
+ /* If skipping the char reaches the end of string we still want to
+ * run the last recursion at $ since it might match and substitute
+ * something.
+ */
+ if (match_end < val_len || skip) {
+ /* XXX: recursing by using AP_REG_NOTBOL (because we are not at ^
+ * anymore) and then "losing" the beginning of the string is not
+ * always correct. Say we match "(?<=a)ba" against "ababa", on
+ * recursion ap_regexec_len() will not know that the second "b" is
+ * preceded by "a" thus not match. We'd need a new ap_regexec_ex()
+ * that can take match_end as an offset to fix this..
+ */
+ rem = process_regexp_core(hdr,
+ val + match_end, val_len - match_end,
+ r, pmatch, AP_REG_NOTBOL);
+ if (rem == NULL)
+ return NULL;
+ }
+ else {
+ rem = "";
+ }
}
- ret = apr_palloc(r->pool, strlen(value) + 1 + diffsz);
- memcpy(ret, value, pmatch[0].rm_so);
- strcpy(ret + pmatch[0].rm_so, subs);
- strcat(ret, remainder);
+ rem_sz = strlen(rem) + 1; /* includes \0 */
+
+ ret = apr_palloc(r->pool, match_start + sub_len + (skip ? 1 : 0) + rem_sz);
+ memcpy(ret, val, match_start);
+ memcpy(ret + match_start, sub, sub_len);
+ if (skip) {
+ ret[match_start + sub_len] = skip;
+ sub_len++;
+ }
+ memcpy(ret + match_start + sub_len, rem, rem_sz);
return ret;
}
+static const char *process_regexp(header_entry *hdr, const char *value,
+ request_rec *r)
+{
+ ap_regmatch_t pmatch[AP_MAX_REG_MATCH];
+ return process_regexp_core(hdr, value, strlen(value), r, pmatch, 0);
+}
+
static int echo_header(void *v, const char *key, const char *val)
{
echo_do *ed = (echo_do *)v;