Hi William,

A few comments below.

On Tue, Jan 05, 2021 at 12:08:58PM +0100, William Dauchy wrote:
> diff --git a/src/http_conv.c b/src/http_conv.c
> index 4afa6a2fd..0fded0cc7 100644
> --- a/src/http_conv.c
> +++ b/src/http_conv.c
> @@ -268,6 +268,83 @@ static int sample_conv_url_dec(const struct arg *args, 
> struct sample *smp, void
>       return 1;
>  }
>  
> +/* Check url-encode type */
> +enum encode_type {
> +     ENC_QUERY = 0,
> +};

Please leave a blank line here between the end of the enum and the function.

> +static int sample_conv_url_enc_check(struct arg *arg, struct sample_conv 
> *conv,
> +                                  const char *file, int line, char **err)
> +{
> +     enum encode_type enc_type;
> +
> +     enc_type = ENC_QUERY;
> +     if (!arg) {
> +             memprintf(err, "Unexpected empty arg list");
> +             return 0;
> +     }

As much as I remember, check functions are never called with a NULL args
list, it's always an array, so you can drop this test.

> +     if (arg->type != ARGT_STR) {
> +             memprintf(err, "Unexpected arg type for encoding");
> +             return 0;
> +     }

The expression parser guarantees that you can't have anything else, because
you mentioned your converter takes a mandatory string argument. Thus this
test can be removed as well.

> +
> +     if (arg && arg->type == ARGT_STR) {

This test is redundant with to the two previous ones. And since we know
they can never happen, you can simply remove the test and leave the code
below out of the block.

> +             if (strcmp(arg->data.str.area, "") == 0)
> +                     enc_type = ENC_QUERY;
> +             else if (strcmp(arg->data.str.area, "query") == 0)
> +                     enc_type = ENC_QUERY;
> +             else {
> +                     memprintf(err, "Unexpected encode type. "
> +                               "Allowed value is 'query'");
> +                     return 0;
> +             }
> +     }
> +
> +     chunk_destroy(&arg->data.str);

I didn't remember we needed to do this, I had to recheck :-)

> +     arg->type = ARGT_SINT;
> +     arg->data.sint = enc_type;
> +     return 1;
> +}
> +
> +/* This fetch url-encode any input string. Only support query string for now 
> */
> +static int sample_conv_url_enc(const struct arg *args, struct sample *smp, 
> void
> +             *private)
> +{
> +     enum encode_type enc_type;
> +     struct buffer *trash = get_trash_chunk();
> +     long url_encode_map[(256 / 8) / sizeof(long)];
> +     char *ret;
> +     int i;
> +
> +     enc_type = ENC_QUERY;
> +     if (args)
> +             enc_type = args->data.sint;
> +
> +     /* Add final \0 required by encode_string() */
> +     smp->data.u.str.area[smp->data.u.str.data] = '\0';
> +
> +     memset(url_encode_map, 0, sizeof(url_encode_map));
> +     if (enc_type == ENC_QUERY) {
> +             /* use rfc3986 to determine list of characters to keep 
> unchanged for
> +              * query string */
> +             for (i = 0; i < 256; i++) {
> +                     if (!((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')
> +                         || (i >= '0' && i <= '9') ||
> +                         i == '-' || i == '.' || i == '_' || i == '~'))
> +                             ha_bit_set(i, url_encode_map);
> +             }

No, this is not a good idea, you're doing this in the stack for every
single call. Better put it outside, as a global variable, and initalize
it in an initcall function. I'm even seeing that we have similarly named
variables in log.c, maybe you can call your variable query_encode_map()
or url_enc_query_map() or anything else that makes sense to you.

> +     } else {
> +             return 0;
> +     }
> +     ret = encode_string(trash->area, trash->area + trash->size, '%',
> +                         url_encode_map, smp->data.u.str.area);
> +     if (ret == NULL || *ret != '\0')
> +             return 0;
> +     trash->data = strlen(trash->area);

>From what I'm seeing from the encode_string() function, it returns the
pointer to the trailing zero so that you don't have to run a costly
strlen() over it, thus you can simply write:

       trash->data = ret - trash->area;

> +     smp->data.u.str = *trash;
> +     return 1;
> +}

Thanks!
Willy

Reply via email to