testbuffer.c

Graham Leggett via dev Wed, 24 Apr 2024 10:51:51 -0700

On 24 Apr 2024, at 14:03, Yann Ylavic <ylavic....@gmail.com> wrote:

>> -#define APR_BUFFER_MAX APR_SIZE_MAX/2
>> +#define APR_BUFFER_MAX (APR_SIZE_MAX/2-1)
> 
> It seems that the ->size does include the terminating NUL byte so
> APR_BUFFER_MAX could be APR_SIZE_MAX/2 no? Why -1 here?
> Also maybe ->size should be ->len then?


The size doesn't include the terminating NUL byte, that went away when the 
internal size became unsigned.

Thinking about this more, the size restriction on size only applies inside 
apr_buffer, no such restriction exists on the full sized apr_size_t passed to 
apr_palloc().

The -1 isn't necessary in this case. 

>> @@ -74,21 +76,25 @@ APR_DECLARE(apr_status_t) apr_buffer_str
> 
> Here in apr_buffer_str_set() we still have apr_ssize_t len, please
> make it an apr_size_t for the reason explained in the other thread
> (also the apr_buffer_mem_*() ones use apr_size_t already so it should
> be consistent)?

The apr_ssize_t is intended in this case.

As per the docs, an apr_ssize_t of -1 means compute the length:

https://github.com/apache/apr/blob/trunk/include/apr_buffer.h#L46

/**
 * When passing a string to apr_buffer_str_make, this value can be
 * passed to indicate a string with unknown length, and have apr_buffer_str_make
 * compute the length automatically.
 */
#define APR_BUFFER_STRING     (-1)

https://github.com/apache/apr/blob/trunk/include/apr_buffer.h#L125

 * @param len The length of the string without terminating zero, or
 * APR_BUFFER_STRING to have the length calculated.

The definition of ssize_t is "unsigned length or -1", so we're using it 
correctly.

The purpose of the apr_buffer is to make a stark and impossible to mix up 
distinction between a string and a memory area, apr_buffer_src() and 
apr_buffer_mem() definitely shouldn't be the same.

> 
>>     if (!str) {
>>         buf->d.str = NULL;
>>         buf->size = 0;
>> +        buf->zero_terminated = 0;
>>     }
>>     else if (len < 0) {
>>         apr_size_t slen = strlen(str);
>>         if (slen <= APR_BUFFER_MAX) {
>>             buf->d.str = str;
>> -            buf->size = -(apr_off_t)(slen + 1);
>> +            buf->size = slen;
>> +            buf->zero_terminated = 1;
>>         }
>>         else {
>>             buf->d.str = NULL;
>>             buf->size = 0;
>> +            buf->zero_terminated = 0;
> 
> return APR_EINVAL?

A NULL buffer is valid and intended.

https://github.com/apache/apr/blob/trunk/include/apr_buffer.h#L162

/**
 * Does the buffer contain a NULL buffer.
 *
 * If the internal buffer is NULL, 1 is returned, otherwise 0.
 *
 * @param buf The buffer.
 * @return Returns 1 if buffer is null, otherwise 0.
 */
APR_DECLARE(int) apr_buffer_is_null(const apr_buffer_t *buf)
                                    __attribute__((nonnull(1)));

>>         }
>>     }
>>     else {
>>         buf->d.str = str;
>> -        buf->size = -(len + 1);
>> +        buf->size = len;
>> +        buf->zero_terminated = 1;
> 
> Shouldn't we check that str[len] == '\0' here? Though if it's not the
> check might be out of bounds, but OTOH it's not a string buffer and
> ->zero_terminated is not true..

Yes, we should.

It's a great chance to catch bogus strings.

>>     }
>> 
>>     return APR_SUCCESS;
> 
> Overall I think it could be simplified as:
> 
> APR_DECLARE(apr_status_t) apr_buffer_str_set(apr_buffer_t *buf,
>                                             char *str, apr_size_t len)
> {
>    if (len == APR_BUFFER_STRING) {
>        len = str ? strlen(str) : 0;
>    }
>    else if (str ? str[len] != '\0' : len != 0) {
>        return APR_EINVAL;
>    }
>    if (len > APR_BUFFER_MAX) {
>        return APR_EINVAL; /* or APR_ENOSPC? */
>    }
> 
>    buf->d.str = str;
>    buf->size = len;
>    buf->zero_terminated = 1;
> 
>    return APR_SUCCESS;
> }

This breaks when src is NULL.

>> @@ -100,25 +106,25 @@ APR_DECLARE(apr_buffer_t *) apr_buffer_s
> 
> Likewise apr_ssize_t => apr_size_t len for apr_buffer_str_make(),
> APR_BUFFER_STRING and co should be unsigned (e.g. APR_SIZE_MAX) too.

We already use -1 to mean "compute the length" in various parts of APR, such as 
apr_hash. We should keep the behaviour the same as the rest of APR.

If "compute the length" was APR_SIZE_MAX, we couldn't distinguish between 
"compute the length" and "too big".

>>     apr_buffer_t *buf;
>>     apr_int64_t size;
>>     apr_size_t slen;
>> +    unsigned int zero_terminated;
>> 
>>     if (!str) {
>>         str = NULL;
>>         size = 0;
>> +        zero_terminated = 0;
>>     }
>> -    if (APR_BUFFER_STRING == len && !str[0]) {
>> -        size = len;
>> -    }
>> -    else if (len < 0) {
>> +    if (APR_BUFFER_STRING == len) {
>>         slen = strlen(str);
>>         if (slen <= APR_BUFFER_MAX) {
>> -            size = -(apr_off_t)(slen + 1);
>> +            size = slen;
>> +            zero_terminated = 1;
>>         }
>>         else {
>>             return NULL;
>>         }
>>     }
>>     else {
>> -        size = -(len + 1);
>> +        size = (apr_size_t)len;
>>     }
>> 
>>     buf = apr_palloc(pool, sizeof(apr_buffer_t));
>> @@ -126,6 +132,7 @@ APR_DECLARE(apr_buffer_t *) apr_buffer_s
>>     if (buf) {
>>         buf->d.str = str;
>>         buf->size = size;
>> +        buf->zero_terminated = zero_terminated;
>>     }
>> 
>>     return buf;
> 
> Since there could be multiple error cases for apr_buffer_str_make()
> too, I think we cannot return NULL so rather than a _make() we need a
> _create() like:
> 
> APR_DECLARE(apr_status_t) apr_buffer_str_create(apr_pool_t *pool,
>                                                apr_buffer_t **pbuf,
>                                                char *str, apr_size_t len)
> {
>    *pbuf = apr_palloc(pool, sizeof(apr_buffer_t));
> 
>    return *pbuf ? apr_buffer_str_set(*pbuf, str, len) : APR_ENOMEM;
> }
> 
> ?

Hmmm... we have both APR_ENOMEM and a possible APR_EINVAL if a 
non-zero-terminated-string is passed in, so this makes sense.

>> 
>> APR_DECLARE(apr_size_t) apr_buffer_allocated(const apr_buffer_t *buf)
> 
> apr_buffer_size() if ->size is changed to ->len ?

apr_buffer_allocated() means physical size, apr_buffer_len() means logical 
length. One you care about when you're interested in the logical data, the 
other you care about when you're copying the physical data, for example when 
you're writing it to shared memory or some other system with special 
requirements.

>> {
>> -    if (buf->size < 0) {
>> -        return (apr_size_t)((-buf->size));
>> -    }
>> -    else {
>> -        return (apr_size_t)buf->size;
>> -    }
>> +    return buf->size + buf->zero_terminated;
>> }
> 
>> 
>> APR_DECLARE(void *) apr_buffer_mem(const apr_buffer_t *buf, apr_size_t *size)
>> @@ -227,11 +214,12 @@ APR_DECLARE(apr_buffer_t *) apr_buffer_a
>>     for (i = 0; i < nelts; i++) {
>> 
>>         /* absolute value is size of mem buffer including optional 
>> terminating zero */
>> -        apr_size_t size = src->size < 0 ? -src->size : src->size;
>> +        apr_size_t size = src->size + src->zero_terminated;
>> 
>>         void *mem = alloc(ctx, size);
> 
> Check mem == NULL => APR_ENOMEM ?

Definitely yes.

>>         memcpy(mem, src->d.mem, size);
>> 
>> +        dst->zero_terminated = src->zero_terminated;
>>         dst->size = src->size;
>>         dst->d.mem = mem;
> 
>> 
>> @@ -256,22 +244,25 @@ APR_DECLARE(apr_buffer_t *) apr_buffer_c
>> 
>>         dst->d.mem = NULL;
>>         dst->size = 0;
>> +        dst->zero_terminated = 0;
>> 
>>     }
>>     else if (!alloc) {
>> 
>>         dst->d.mem = src->d.mem;
>>         dst->size = src->size;
>> -
>> +        dst->zero_terminated = src->zero_terminated;
>> +
>>     }
>>     else {
>> 
>>         /* absolute value is size of mem buffer including optional 
>> terminating zero */
>> -        apr_size_t size = src->size < 0 ? -src->size : src->size;
>> +        apr_size_t size = src->size + src->zero_terminated;
>> 
>>         void *mem = alloc(ctx, size);
>>         memcpy(mem, src->d.mem, size);
>> 
>> +        dst->zero_terminated = src->zero_terminated;
>>         dst->size = src->size;
>>         dst->d.mem = mem;
> 
> Maybe we shouldn't check/accept src == NULL (and let it crash), and
> alloc == NULL (it's a simple memcpy functionally, the user can do it
> already), so just do the deep copy (apr_buffer_copy() =>
> apr_buffer_clone() ?)

We need the ability to store a NULL in a buffer, that's a valid value.

>> @@ -280,33 +271,7 @@ APR_DECLARE(apr_buffer_t *) apr_buffer_c
>>     return dst;
>> }
>> 
>> -
>> APR_DECLARE(int) apr_buffer_cmp(const apr_buffer_t *src,
>> -                                const apr_buffer_t *dst)
>> -{
>> -    apr_size_t slen = apr_buffer_len(src);
>> -    apr_size_t dlen = apr_buffer_len(dst);
>> -
>> -    if (slen != dlen) {
>> -        return slen < dlen ? -1 : 1;
>> -    }
>> -    else if (src->d.mem == dst->d.mem) {
>> -        /* identical data or both NULL */
>> -        return 0;
>> -    }
>> -    else if (!src->d.mem) {
>> -        return -1;
>> -    }
>> -    else if (!dst->d.mem) {
>> -        return 1;
>> -    }
>> -    else {
>> -        return memcmp(src->d.mem, dst->d.mem, slen);
>> -    }
>> -}
>> -
>> -
>> -APR_DECLARE(int) apr_buffer_ncmp(const apr_buffer_t *src,
>>                                  const apr_buffer_t *dst)
>> {
>>     if (!src || !src->d.mem) {
>> @@ -322,7 +287,17 @@ APR_DECLARE(int) apr_buffer_ncmp(const a
>>             return 1;
>>         }
>>         else {
>> -            return apr_buffer_cmp(src, dst);
>> +
>> +            apr_size_t slen = apr_buffer_len(src);
>> +            apr_size_t dlen = apr_buffer_len(dst);
>> +
>> +            if (slen != dlen) {
>> +                return slen < dlen ? -1 : 1;
>> +            }
>> +            else {
>> +                return memcmp(src->d.mem, dst->d.mem, slen);
>> +            }
>> +
>>         }
>>     }
>> }
> 
> APR_DECLARE(int) apr_buffer_cmp(const apr_buffer_t *src,
>                                const apr_buffer_t *dst)
> {
>    apr_size_t slen, dlen;
> 
>    if (!src) {
>        return dst ? 1 : 0;
>    }
>    if (!dst) {
>        return -1;
>    }
>    if (src->size != dst->size) {
>        return src->size < dst->size ? -1 : 1;
>    }
> 
>    return memcmp(src->d.mem, dst->d.mem, slen);
> }
> 
> Though I still don't think that we should handle NULLs here (let it crash..).

NULL support is intended.

>> @@ -344,17 +319,17 @@ APR_DECLARE(char *) apr_buffer_pstrncat(
>>             size += seplen;
>>         }
>> 
>> -        if (src->size < 0) {
>> -            size += (apr_size_t)(-src->size) - 1;
>> +        if (src->zero_terminated) {
>> +            size += src->size;
> 
> Why not honor APR_BUFFER_BASE64 if it's a string buffer?

Base64 is something you do to binary data, not strings - type safety is the 
point behind the api.

The LDAP api returns data as either a string, or a binary blob. When you're 
trying to log buffers, you want the string to stay a string, and the binary 
blob to become a string safely.

When you're tasked with creating an LDAP filter, you want your strings to stay 
strings, and your binary blobs to be base64 encoded.

The plan is to support the whole range of encodings, because we can.

> 
>>         }
>>         else {
>>             if (APR_BUFFER_NONE == flags) {
> 
> s/APR_BUFFER_NONE/APR_BUFFER_PLAIN/ ?

Hmmm... this does make sense.

>> -                size += (apr_size_t)src->size;
>> +                size += src->size;
>>             }
>>             else if (APR_BUFFER_BASE64 == flags) {
>>                 apr_size_t b64len;
>> 
>> -                if (APR_SUCCESS != apr_encode_base64(NULL, src->d.mem, 
>> (apr_size_t)src->size,
>> +                if (APR_SUCCESS != apr_encode_base64(NULL, src->d.mem, 
>> src->size,
>>                                                      APR_ENCODE_NONE, 
>> &b64len)) {
>>                     return NULL;
>>                 }
>> @@ -379,19 +354,19 @@ APR_DECLARE(char *) apr_buffer_pstrncat(
>>             strncpy(dst, sep, seplen);
>>             dst += seplen;
>>         }
>> -
>> -        if (src->size < 0) {
>> -            strncpy(dst, src->d.str, (apr_size_t)((-src->size) - 1));
>> -            dst += (-src->size) - 1;
>> +
>> +        if (src->zero_terminated) {
>> +            strncpy(dst, src->d.str, src->size);
>> +            dst += src->size;
>>         }
>>         else {
>>             if (APR_BUFFER_NONE == flags) {
>> -                memcpy(dst, src->d.mem, (apr_size_t)src->size);
>> +                memcpy(dst, src->d.mem, src->size);
>>             }
>>             else if (APR_BUFFER_BASE64 == flags) {
>>                 apr_size_t b64len;
>> 
>> -                if (APR_SUCCESS != apr_encode_base64(dst, src->d.mem, 
>> (apr_size_t)src->size,
>> +                if (APR_SUCCESS != apr_encode_base64(dst, src->d.mem, 
>> src->size,
>>                                                      APR_ENCODE_NONE, 
>> &b64len)) {
>>                     return NULL;
>>                 }
> 
>> 
>> --- apr/apr/trunk/include/apr_buffer.h (original)
>> +++ apr/apr/trunk/include/apr_buffer.h Mon Apr 22 12:46:37 2024
>> @@ -73,13 +73,17 @@ typedef struct
>>         void *mem;
>>     } d;
>> 
>> -    /** size of the data. If positive, the data is of fixed size. If
>> -      * negative, the data is zero terminated and the absolute value
>> -      * represents the data length including terminating zero.
>> -      *
>> -      * we use apr_off_t to make it simple to detect overflow.
>> -      */
>> -    apr_off_t size;
>> +    /** is the data zero terminated */
>> +    unsigned int zero_terminated:1;
>> +
>> +    /** size of the data, excluding terminating zero */
>> +#if defined(SIZE_MAX) && SIZE_MAX == APR_UINT64_MAX
>> +    apr_size_t size:63;
>> +#elif defined(SIZE_MAX) && SIZE_MAX == APR_UINT32_MAX
>> +    apr_size_t size:31;
>> +#else
>> +#error sizeof size_t is neither 64 nor 32 bit (SIZE_MAX not defined)
>> +#endif
> 
> Is sizeof(apr_buffer_t) still 8 (32bit) or 16 (64bit) with
> ->zero_terminated and ->size in two bitmaps of different type/width
> (unsigned int for the former, apr_size_t for the latter)?
> It would be simpler to have both use apr_size_t IMHO.

I went looking for the formal definition of this, and all I could find is that 
bits are bits.

In theory there should be no need for a 1 bit wide field and a 31 bit wide 
field to have the same type, both will pack to a total of 32 bits.

Regards,
Graham
--

Re: svn commit: r1917266 - in /apr/apr/trunk: buffer/apr_buffer.c include/apr_buffer.h test/testbuffer.c

Reply via email to