Hi Aaron,
On Tue, 2025-07-15 at 00:25 -0400, Aaron Merey wrote:
> Currently each archive descriptor maintains a single Elf_Arhdr for the
> current archive member (as determined by elf_next or elf_rand) which is
> returned by elf_getarhdr.
>
> A single per-archive Elf_Arhdr is not ideal since elf_next and elf_rand
> can invalidate an archive member's reference to its own Elf_Arhdr.
>
> Avoid this possible invalidation by storing each Elf_Arhdr in its
> archive member descriptor.
>
> The existing Elf_Arhdr parsing and storage in the archive descriptor
> internal state is left mostly untouched. When an archive member is
> created with elf_begin it is given its own copy of its Elf_Arhdr from
> the archive descriptor.
>
> Also update tests/arextract.c with verification that each Elf_Arhdr
> is distinct and remains valid after calls to elf_next that would
> have previously invalidated the Elf_Arhdr.
Nice. I like the design, but some questions below.
> diff --git a/libelf/elf_begin.c b/libelf/elf_begin.c
> index 3ed1f8d7..5ed5aaa3 100644
> --- a/libelf/elf_begin.c
> +++ b/libelf/elf_begin.c
> @@ -1065,11 +1065,14 @@ dup_elf (int fildes, Elf_Cmd cmd, Elf *ref)
> result = read_file (fildes, ref->state.ar.offset + sizeof (struct ar_hdr),
> ref->state.ar.elf_ar_hdr.ar_size, cmd, ref);
>
> - /* Enlist this new descriptor in the list of children. */
> if (result != NULL)
> {
> + /* Enlist this new descriptor in the list of children. */
> result->next = ref->state.ar.children;
> ref->state.ar.children = result;
> +
> + /* Ensure the member descriptor has its own copy of its header info.
> */
> + result->elf_ar_hdr = ref->state.ar.elf_ar_hdr;
> }
>
> return result;
Shouldn't this be result->elf_ar_hdr = ref->elf_ar_hdr.
See also below, the main question is if state.ar should still have an
elf_ar_hdr field itself.
> diff --git a/libelf/elf_clone.c b/libelf/elf_clone.c
> index e6fe4729..d6c8d541 100644
> --- a/libelf/elf_clone.c
> +++ b/libelf/elf_clone.c
> @@ -69,6 +69,7 @@ elf_clone (Elf *elf, Elf_Cmd cmd)
> == offsetof (struct Elf, state.elf64.scns));
> retval->state.elf.scns_last = &retval->state.elf32.scns;
> retval->state.elf32.scns.max = elf->state.elf32.scns.max;
> + retval->elf_ar_hdr = elf->elf_ar_hdr;
>
> retval->class = elf->class;
> }
Right, this is what I was expecting in elfdup above too.
> diff --git a/libelf/elf_getarhdr.c b/libelf/elf_getarhdr.c
> index 509f1da5..ec85fa71 100644
> --- a/libelf/elf_getarhdr.c
> +++ b/libelf/elf_getarhdr.c
> @@ -44,30 +44,12 @@ elf_getarhdr (Elf *elf)
> if (elf == NULL)
> return NULL;
>
> - Elf *parent = elf->parent;
> -
> /* Calling this function is not ok for any file type but archives. */
> - if (parent == NULL)
> + if (elf->parent == NULL)
> {
> __libelf_seterrno (ELF_E_INVALID_OP);
> return NULL;
> }
Should this be
if (elf->parent == NULL || elf->parent->kind != ELF_K_AR)
Below we do have an assert, so maybe this suggestion is wrong.
> - /* Make sure we have read the archive header. */
> - if (parent->state.ar.elf_ar_hdr.ar_name == NULL
> - && __libelf_next_arhdr_wrlock (parent) != 0)
> - {
> - rwlock_wrlock (parent->lock);
> - int st = __libelf_next_arhdr_wrlock (parent);
> - rwlock_unlock (parent->lock);
> -
> - if (st != 0)
> - /* Something went wrong. Maybe there is no member left. */
> - return NULL;
> - }
> -
> - /* We can be sure the parent is an archive. */
> - assert (parent->kind == ELF_K_AR);
Was this assert correct? Can an Elf only have an parent if it comes
from an ELF_K_AR file?
> - return &parent->state.ar.elf_ar_hdr;
> + return &elf->elf_ar_hdr;
> }
> diff --git a/libelf/libelfP.h b/libelf/libelfP.h
> index 66e7e4dd..20120ad3 100644
> --- a/libelf/libelfP.h
> +++ b/libelf/libelfP.h
> @@ -306,6 +306,9 @@ struct Elf
> /* Reference counting for the descriptor. */
> int ref_count;
>
> + /* Per-descriptor copy of the structure returned by 'elf_getarhdr'. */
> + Elf_Arhdr elf_ar_hdr;
> +
> /* Lock to handle multithreaded programs. */
> rwlock_define (,lock);
So this adds an Elf_Arhdr to all Elfs.
Three questions:
- Do you still need an elf_ar_hdr field in the state union for ar?
- Should this go into the the elf and/or elf32/64 state union
structs instead?
- What about the state.ar.ar_name and state.ar_rawname fields?
Those are pointed to from the elf_ar_hdr fields ar_name and raw_name.
So shouldn't they also be held as top-level or elf[32|64] field
states?
> diff --git a/tests/arextract.c b/tests/arextract.c
> index 936d7f55..7920d1c9 100644
> --- a/tests/arextract.c
> +++ b/tests/arextract.c
> @@ -21,12 +21,20 @@
>
> #include <fcntl.h>
> #include <gelf.h>
> +#include <limits.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> #include <system.h>
>
> +typedef struct hdr_node {
> + Elf *elf;
> + Elf_Arhdr *hdr;
> + struct hdr_node *next;
> +} hdr_node;
> +
> +hdr_node *hdr_list = NULL;
>
> int
> main (int argc, char *argv[])
> @@ -80,6 +88,27 @@ main (int argc, char *argv[])
> exit (1);
> }
>
> + /* Keep a list of subelfs and their Elf_Arhdr. This is used to
> + verifiy that each archive member descriptor stores its own
> + Elf_Ahdr as opposed to the archive descriptor storing one
> + Elf_Ahdr at a time for all archive members. */
> + hdr_node *node = calloc (1, sizeof (hdr_node));
> + if (node == NULL)
> + {
> + printf ("calloc failed: %s\n", strerror (errno));
> + exit (1);
> + }
> + node->elf = subelf;
> + node->hdr = arhdr;
> +
> + if (hdr_list != NULL)
> + {
> + node->next = hdr_list;
> + hdr_list = node;
> + }
> + else
> + hdr_list = node;
> +
> if (strcmp (arhdr->ar_name, argv[2]) == 0)
> {
> int outfd;
> @@ -128,8 +157,37 @@ Failed to get base address for the archive element:
> %s\n",
> exit (1);
> }
>
> - /* Close the descriptors. */
> - if (elf_end (subelf) != 0 || elf_end (elf) != 0)
> + /* Close each subelf descriptor. */
> + hdr_node *cur;
> + while ((cur = hdr_list) != NULL)
> + {
> + /* Read arhdr names to help detect if there's a problem with the
> + per-member Elf_Arhdr storage. */
> + if (memchr (cur->hdr->ar_name, '\0', PATH_MAX) == NULL)
> + {
> + puts ("ar_name missing null character");
> + exit (1);
> + }
> +
> + if (memchr (cur->hdr->ar_rawname, '\0', PATH_MAX) == NULL)
> + {
> + puts ("ar_rawname missing null character");
> + exit (1);
> + }
I wonder if there is some way to check if the ar_name and/or ar_rawname
are unique here?
> + if (elf_end (cur->elf) != 0)
> + {
> + printf ("Error while freeing subELF descriptor: %s\n",
> + elf_errmsg (-1));
> + exit (1);
> + }
> +
> + hdr_list = cur->next;
> + free (cur);
> + }
> +
> + /* Close the archive descriptor. */
> + if (elf_end (elf) != 0)
> {
> printf ("Freeing ELF descriptors failed: %s", elf_errmsg (-1));
> exit (1);
>
> @@ -144,12 +202,6 @@ Failed to get base address for the archive element:
> %s\n",
>
> /* Get next archive element. */
> cmd = elf_next (subelf);
> - if (elf_end (subelf) != 0)
> - {
> - printf ("error while freeing sub-ELF descriptor: %s\n",
> - elf_errmsg (-1));
> - exit (1);
> - }
> }
>
> /* When we reach this point we haven't found the given file in the
Nice test.
Cheers,
Mark