Hi William,
On Mon, 30 May 2016, William Duclot wrote:
> It is unfortunate that it is currently impossible to use a strbuf
> without doing a memory allocation. So code like
>
> void f()
> {
> char path[PATH_MAX];
> ...
> }
>
> typically gets turned into either
>
> void f()
> {
> struct strbuf path;
> strbuf_add(&path, ...); <-- does a malloc
> ...
> strbuf_release(&path); <-- does a free
> }
>
> which costs extra memory allocations, or
>
> void f()
> {
> static struct strbuf path;
> strbuf_add(&path, ...);
> ...
> strbuf_setlen(&path, 0);
> }
>
> which, by using a static variable, avoids most of the malloc/free
> overhead, but makes the function unsafe to use recursively or from
> multiple threads. Those limitations prevent strbuf to be used in
> performance-critical operations.
This description is nice and verbose, but maybe something like this would
introduce the subject in a quicker manner?
When working e.g. with file paths or with dates, strbuf's
malloc()/free() dance of strbufs can be easily avoided: as
a sensible initial buffer size is already known, it can be
allocated on the heap.
The rest of the commit message flows nicely.
> diff --git a/strbuf.c b/strbuf.c
> index 1ba600b..527b986 100644
> --- a/strbuf.c
> +++ b/strbuf.c
> @@ -1,6 +1,14 @@
> #include "cache.h"
> #include "refs.h"
> #include "utf8.h"
> +#include <sys/param.h>
Why?
> +/**
> + * Flags
> + * --------------
> + */
> +#define STRBUF_OWNS_MEMORY 1
> +#define STRBUF_FIXED_MEMORY (1 << 1)
>From reading the commit message, I expected STRBUF_OWNS_MEMORY.
STRBUF_FIXED_MEMORY still needs to be explained.
> @@ -20,16 +28,37 @@ char strbuf_slopbuf[1];
>
> void strbuf_init(struct strbuf *sb, size_t hint)
> {
> + sb->flags = 0;
> sb->alloc = sb->len = 0;
> sb->buf = strbuf_slopbuf;
> if (hint)
> strbuf_grow(sb, hint);
> }
>
> +void strbuf_wrap_preallocated(struct strbuf *sb, char *path_buf,
> + size_t path_buf_len, size_t alloc_len)
> +{
> + if (!path_buf)
> + die("you try to use a NULL buffer to initialize a strbuf");
> +
> + strbuf_init(sb, 0);
> + strbuf_attach(sb, path_buf, path_buf_len, alloc_len);
> + sb->flags &= ~STRBUF_OWNS_MEMORY;
> + sb->flags &= ~STRBUF_FIXED_MEMORY;
Shorter: sb->flags &= ~(STRBUF_OWNS_MEMORY | STRBUF_FIXED_MEMORY);
> +}
> +
> +void strbuf_wrap_fixed(struct strbuf *sb, char *path_buf,
> + size_t path_buf_len, size_t alloc_len)
> +{
> + strbuf_wrap_preallocated(sb, path_buf, path_buf_len, alloc_len);
> + sb->flags |= STRBUF_FIXED_MEMORY;
> +}
Rather than letting strbuf_wrap_preallocated() set sb->flags &=
~FIXED_MEMORY only to revert that decision right away, a static function
could be called by both strbuf_wrap_preallocated() and
strbuf_wrap_fixed().
> void strbuf_release(struct strbuf *sb)
> {
> if (sb->alloc) {
> - free(sb->buf);
> + if (sb->flags & STRBUF_OWNS_MEMORY)
> + free(sb->buf);
> strbuf_init(sb, 0);
> }
Should we not reset the flags here, too?
> @@ -38,7 +67,11 @@ char *strbuf_detach(struct strbuf *sb, size_t *sz)
> {
> char *res;
> strbuf_grow(sb, 0);
> - res = sb->buf;
> + if (sb->flags & STRBUF_OWNS_MEMORY)
> + res = sb->buf;
> + else
> + res = xmemdupz(sb->buf, sb->alloc - 1);
This looks like a usage to be avoided: if we plan to detach the buffer,
anyway, there is no good reason to allocate it on the heap first. I would
at least issue a warning here.
> @@ -51,6 +84,8 @@ void strbuf_attach(struct strbuf *sb, void *buf, size_t
> len, size_t alloc)
> sb->buf = buf;
> sb->len = len;
> sb->alloc = alloc;
> + sb->flags |= STRBUF_OWNS_MEMORY;
> + sb->flags &= ~STRBUF_FIXED_MEMORY;
> strbuf_grow(sb, 0);
> sb->buf[sb->len] = '\0';
> }
> @@ -61,9 +96,32 @@ void strbuf_grow(struct strbuf *sb, size_t extra)
> if (unsigned_add_overflows(extra, 1) ||
> unsigned_add_overflows(sb->len, extra + 1))
> die("you want to use way too much memory");
> - if (new_buf)
> - sb->buf = NULL;
> - ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
> + if ((sb->flags & STRBUF_FIXED_MEMORY) && sb->len + extra + 1 >
> sb->alloc)
> + die("you try to make a string overflow the buffer of a fixed
> strbuf");
We try to avoid running over 80 columns/row. This message could be
more to the point: cannot grow fixed string
> + /*
> + * ALLOC_GROW may do a realloc() if needed, so we must not use it on
> + * a buffer the strbuf doesn't own
> + */
> + if (sb->flags & STRBUF_OWNS_MEMORY) {
> + if (new_buf)
> + sb->buf = NULL;
> + ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
> + } else {
> + /*
> + * The strbuf doesn't own the buffer: to avoid to realloc it,
> + * the strbuf needs to use a new buffer without freeing the old
> + */
> + if (sb->len + extra + 1 > sb->alloc) {
> + size_t new_alloc = MAX(sb->len + extra + 1,
> alloc_nr(sb->alloc));
> + char *buf = xmalloc(new_alloc);
> + memcpy(buf, sb->buf, sb->alloc);
> + sb->buf = buf;
> + sb->alloc = new_alloc;
> + sb->flags |= STRBUF_OWNS_MEMORY;
> + }
> + }
> +
> if (new_buf)
> sb->buf[0] = '\0';
> }
> diff --git a/strbuf.h b/strbuf.h
> index 7987405..634759c 100644
> --- a/strbuf.h
> +++ b/strbuf.h
> @@ -11,11 +11,16 @@
> * A strbuf is NUL terminated for convenience, but no function in the
> * strbuf API actually relies on the string being free of NULs.
> *
> + * You can avoid the malloc/free overhead of `strbuf_init()`, `strbuf_add()`
> and
> + * `strbuf_release()` by wrapping pre-allocated memory (stack-allocated for
> + * example) using `strbuf_wrap_preallocated()` or `strbuf_wrap_fixed()`.
> + *
> * strbufs have some invariants that are very important to keep in mind:
> *
> * - The `buf` member is never NULL, so it can be used in any usual C
> * string operations safely. strbuf's _have_ to be initialized either by
> - * `strbuf_init()` or by `= STRBUF_INIT` before the invariants, though.
> + * `strbuf_init()`, `= STRBUF_INIT`, `strbuf_wrap_preallocated()` or
> + * `strbuf_wrap_fixed()` before the invariants, though.
> *
> * Do *not* assume anything on what `buf` really is (e.g. if it is
> * allocated memory or not), use `strbuf_detach()` to unwrap a memory
> @@ -62,13 +67,14 @@
> * access to the string itself.
> */
> struct strbuf {
> + unsigned int flags;
> size_t alloc;
> size_t len;
> char *buf;
> };
>
> extern char strbuf_slopbuf[];
> -#define STRBUF_INIT { 0, 0, strbuf_slopbuf }
> +#define STRBUF_INIT { 0, 0, 0, strbuf_slopbuf }
If I am not mistaken, to preserve the existing behavior the initial flags
should be 1 (own memory).
BTW this demonstrates that it may not be a good idea to declare the
"flags" field globally but then make the actual flags private.
Also: similar use cases in Git used :1 flags (see e.g. the "configured"
field in credential.h).
Ciao,
Johannes
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html