Re: [PATCH v3 2/2] aarch64: Use mmap to add PROT_BTI instead of mprotect [BZ #26831]

2020-12-10 Thread Adhemerval Zanella



On 02/12/2020 05:55, Szabolcs Nagy via Libc-alpha wrote:
> Re-mmap executable segments if possible instead of using mprotect
> to add PROT_BTI. This allows using BTI protection with security
> policies that prevent mprotect with PROT_EXEC.
> 
> If the fd of the ELF module is not available because it was kernel
> mapped then mprotect is used and failures are ignored.  To protect
> the main executable even when mprotect is filtered the linux kernel
>  will have to be changed to add PROT_BTI to it.
> 
> The delayed failure reporting is mainly needed because currently
> _dl_process_gnu_properties does not propagate failures such that
> the required cleanups happen. Using the link_map_machine struct for
> error propagation is not ideal, but this seemed to be the least
> intrusive solution.
> 
> Fixes bug 26831.

LGTM, thanks.

Reviewed-by: Adhemerval Zanella  

> ---
> v3:
> - split the last patch in two.
> - pushed to nsz/btifix-v3 branch.
> 
>  sysdeps/aarch64/dl-bti.c  | 54 ++-
>  sysdeps/aarch64/dl-prop.h |  8 +-
>  sysdeps/aarch64/linkmap.h |  2 +-
>  3 files changed, 44 insertions(+), 20 deletions(-)
> 
> diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c
> index 67d63c8a73..ff26c98ccf 100644
> --- a/sysdeps/aarch64/dl-bti.c
> +++ b/sysdeps/aarch64/dl-bti.c
> @@ -19,9 +19,17 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
> -static void
> -enable_bti (struct link_map *map, const char *program)
> +/* See elf/dl-load.h.  */
> +#ifndef MAP_COPY
> +# define MAP_COPY (MAP_PRIVATE | MAP_DENYWRITE)
> +#endif
> +
> +/* Enable BTI protection for MAP.  */
> +
> +void
> +_dl_bti_protect (struct link_map *map, int fd)
>  {
>const size_t pagesz = GLRO(dl_pagesize);
>const ElfW(Phdr) *phdr;
> @@ -41,19 +49,31 @@ enable_bti (struct link_map *map, const char *program)
>   if (phdr->p_flags & PF_W)
> prot |= PROT_WRITE;
>  
> - if (__mprotect (start, len, prot) < 0)
> -   {
> - if (program)
> -   _dl_fatal_printf ("%s: mprotect failed to turn on BTI\n",
> - map->l_name);
> - else
> -   _dl_signal_error (errno, map->l_name, "dlopen",
> - N_("mprotect failed to turn on BTI"));
> -   }
> + if (fd == -1)
> +   /* Ignore failures for kernel mapped binaries.  */
> +   __mprotect (start, len, prot);
> + else
> +   map->l_mach.bti_fail = __mmap (start, len, prot,
> +  MAP_FIXED|MAP_COPY|MAP_FILE,
> +  fd, off) == MAP_FAILED;
>}
>  }
>  

OK. I am not very found of ignore the mprotect failure here, but it 
has been discussed in multiple threads that we need kernel support 
to proper handle it.

> -/* Enable BTI for MAP and its dependencies.  */
> +
> +static void
> +bti_failed (struct link_map *l, const char *program)
> +{
> +  if (program)

No implicit checks.

> +_dl_fatal_printf ("%s: %s: failed to turn on BTI protection\n",
> +   program, l->l_name);
> +  else
> +/* Note: the errno value is not available any more.  */
> +_dl_signal_error (0, l->l_name, "dlopen",
> +   N_("failed to turn on BTI protection"));
> +}
> +
> +
> +/* Report BTI protection failures for MAP and its dependencies.  */
>  

Ok.

>  void
>  _dl_bti_check (struct link_map *map, const char *program)
> @@ -61,16 +81,14 @@ _dl_bti_check (struct link_map *map, const char *program)
>if (!GLRO(dl_aarch64_cpu_features).bti)
>  return;
>  
> -  if (map->l_mach.bti)
> -enable_bti (map, program);
> +  if (map->l_mach.bti_fail)
> +bti_failed (map, program);
>  
>unsigned int i = map->l_searchlist.r_nlist;
>while (i-- > 0)
>  {
>struct link_map *l = map->l_initfini[i];
> -  if (l->l_init_called)
> - continue;
> -  if (l->l_mach.bti)
> - enable_bti (l, program);
> +  if (l->l_mach.bti_fail)
> + bti_failed (l, program);
>  }
>  }

Ok.

> diff --git a/sysdeps/aarch64/dl-prop.h b/sysdeps/aarch64/dl-prop.h
> index 2016d1472e..e926e54984 100644
> --- a/sysdeps/aarch64/dl-prop.h
> +++ b/sysdeps/aarch64/dl-prop.h
> @@ -19,6 +19,8 @@
>  #ifndef _DL_PROP_H
>  #define _DL_PROP_H
>  
> +extern void _dl_bti_protect (struct link_map *, int) attribute_hidden;
> +
>  extern void _dl_bti_check (struct link_map *, const char *)
>  attribute_hidden;
>  
> @@ -43,6 +45,10 @@ static inline int
>  _dl_proce

Re: [PATCH v3 1/2] aarch64: align address for BTI protection [BZ #26988]

2020-12-10 Thread Adhemerval Zanella



On 02/12/2020 05:55, Szabolcs Nagy via Libc-alpha wrote:
> Handle unaligned executable load segments (the bfd linker is not
> expected to produce such binaries, but other linkers may).
> 
> Computing the mapping bounds follows _dl_map_object_from_fd more
> closely now.
> 
> Fixes bug 26988.

LGTM, thanks.

Reviewed-by: Adhemerval Zanella  

> ---
> v3:
> - split the last patch in two so this bug is fixed separately.
> - pushed to nsz/btifix-v3 branch.
> 
>  sysdeps/aarch64/dl-bti.c | 14 --
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c
> index 8f4728adce..67d63c8a73 100644
> --- a/sysdeps/aarch64/dl-bti.c
> +++ b/sysdeps/aarch64/dl-bti.c
> @@ -20,19 +20,22 @@
>  #include 
>  #include 
>  
> -static int
> +static void
>  enable_bti (struct link_map *map, const char *program)
>  {

Ok.

> +  const size_t pagesz = GLRO(dl_pagesize);
>const ElfW(Phdr) *phdr;
> -  unsigned prot;
>  
>for (phdr = map->l_phdr; phdr < >l_phdr[map->l_phnum]; ++phdr)
>  if (phdr->p_type == PT_LOAD && (phdr->p_flags & PF_X))
>{
> - void *start = (void *) (phdr->p_vaddr + map->l_addr);
> - size_t len = phdr->p_memsz;
> + size_t vstart = ALIGN_DOWN (phdr->p_vaddr, pagesz);
> + size_t vend = ALIGN_UP (phdr->p_vaddr + phdr->p_filesz, pagesz);
> + off_t off = ALIGN_DOWN (phdr->p_offset, pagesz);
> + void *start = (void *) (vstart + map->l_addr);
> + size_t len = vend - vstart;
>  
> - prot = PROT_EXEC | PROT_BTI;
> + unsigned prot = PROT_EXEC | PROT_BTI;
>   if (phdr->p_flags & PF_R)
> prot |= PROT_READ;
>   if (phdr->p_flags & PF_W)
> @@ -48,7 +51,6 @@ enable_bti (struct link_map *map, const char *program)
>   N_("mprotect failed to turn on BTI"));
> }
>}
> -  return 0;
>  }
>  
>  /* Enable BTI for MAP and its dependencies.  */
> 

Ok.


Re: [PATCH v2 5/6] elf: Pass the fd to note processing

2020-12-10 Thread Adhemerval Zanella



On 27/11/2020 10:21, Szabolcs Nagy via Libc-alpha wrote:
> To handle GNU property notes on aarch64 some segments need to
> be mmaped again, so the fd of the loaded ELF module is needed.
> 
> When the fd is not available (kernel loaded modules), then -1
> is passed.
> 
> The fd is passed to both _dl_process_pt_gnu_property and
> _dl_process_pt_note for consistency. Target specific note
> processing functions are updated accordingly.

LGTM, thanks.

Reviewed-by: Adhemerval Zanella  

> ---
>  elf/dl-load.c  | 12 +++-
>  elf/rtld.c |  4 ++--
>  sysdeps/aarch64/dl-prop.h  |  6 +++---
>  sysdeps/generic/dl-prop.h  |  6 +++---
>  sysdeps/generic/ldsodefs.h |  5 +++--
>  sysdeps/x86/dl-prop.h  |  6 +++---
>  6 files changed, 21 insertions(+), 18 deletions(-)
> 
> diff --git a/elf/dl-load.c b/elf/dl-load.c
> index b0d65f32cc..74039f22a6 100644
> --- a/elf/dl-load.c
> +++ b/elf/dl-load.c
> @@ -837,10 +837,12 @@ _dl_init_paths (const char *llp, const char *source)
>  
>  /* Process PT_GNU_PROPERTY program header PH in module L after
> PT_LOAD segments are mapped.  Only one NT_GNU_PROPERTY_TYPE_0
> -   note is handled which contains processor specific properties.  */
> +   note is handled which contains processor specific properties.
> +   FD is -1 for the kernel mapped main executable otherwise it is
> +   the fd used for loading module L.  */
>  
>  void
> -_dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph)
> +_dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) 
> *ph)
>  {
>const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr);
>const ElfW(Addr) size = ph->p_memsz;

Ok.

> @@ -887,7 +889,7 @@ _dl_process_pt_gnu_property (struct link_map *l, const 
> ElfW(Phdr) *ph)
> last_type = type;
>  
> /* Target specific property processing.  */
> -   if (_dl_process_gnu_property (l, type, datasz, ptr) == 0)
> +   if (_dl_process_gnu_property (l, fd, type, datasz, ptr) == 0)
>   return;
>  
> /* Check the next property item.  */

Ok.

> @@ -1379,10 +1381,10 @@ cannot enable executable stack as shared object 
> requires");
>  switch (ph[-1].p_type)
>{
>case PT_NOTE:
> - _dl_process_pt_note (l, [-1]);
> + _dl_process_pt_note (l, fd, [-1]);
>   break;
>case PT_GNU_PROPERTY:
> - _dl_process_pt_gnu_property (l, [-1]);
> + _dl_process_pt_gnu_property (l, fd, [-1]);
>   break;
>}
>  

Ok.

> diff --git a/elf/rtld.c b/elf/rtld.c
> index c4ffc8d4b7..ec62567580 100644
> --- a/elf/rtld.c
> +++ b/elf/rtld.c
> @@ -1540,10 +1540,10 @@ dl_main (const ElfW(Phdr) *phdr,
>  switch (ph[-1].p_type)
>{
>case PT_NOTE:
> - _dl_process_pt_note (main_map, [-1]);
> + _dl_process_pt_note (main_map, -1, [-1]);
>   break;
>case PT_GNU_PROPERTY:
> - _dl_process_pt_gnu_property (main_map, [-1]);
> + _dl_process_pt_gnu_property (main_map, -1, [-1]);
>   break;
>}
>  

Ok.

> diff --git a/sysdeps/aarch64/dl-prop.h b/sysdeps/aarch64/dl-prop.h
> index b0785bda83..2016d1472e 100644
> --- a/sysdeps/aarch64/dl-prop.h
> +++ b/sysdeps/aarch64/dl-prop.h
> @@ -35,13 +35,13 @@ _dl_open_check (struct link_map *m)
>  }
>  
>  static inline void __attribute__ ((always_inline))
> -_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
> +_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
>  {
>  }
>  
>  static inline int
> -_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
> -   void *data)
> +_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
> +   uint32_t datasz, void *data)
>  {
>if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND)
>  {

Ok.

> diff --git a/sysdeps/generic/dl-prop.h b/sysdeps/generic/dl-prop.h
> index f1cf576fe3..df27ff8e6a 100644
> --- a/sysdeps/generic/dl-prop.h
> +++ b/sysdeps/generic/dl-prop.h
> @@ -37,15 +37,15 @@ _dl_open_check (struct link_map *m)
>  }
>  
>  static inline void __attribute__ ((always_inline))
> -_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
> +_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
>  {
>  }
>  
>  /* Called for each property in the NT_GNU_PROPERTY_TYPE_0 note of L,
> processing of the properties continues until this returns 0.  */
>  static inline int __attribute__ ((always_inline))
> -_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
> -   void *data)
&g

Re: [PATCH v2 3/6] elf: Fix failure handling in _dl_map_object_from_fd

2020-12-10 Thread Adhemerval Zanella



On 27/11/2020 10:20, Szabolcs Nagy via Libc-alpha wrote:
> There are many failure paths that call lose to do local cleanups
> in _dl_map_object_from_fd, but it did not clean everything.
> 
> Handle l_phdr, l_libname and mapped segments in the common failure
> handling code.
> 
> There are various bits that may not be cleaned properly on failure
> (e.g. executable stack, tlsid, incomplete dl_map_segments).
> ---
>  elf/dl-load.c | 24 +++-
>  1 file changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/elf/dl-load.c b/elf/dl-load.c
> index 21e55deb19..9c71b7562c 100644
> --- a/elf/dl-load.c
> +++ b/elf/dl-load.c
> @@ -914,8 +914,15 @@ lose (int code, int fd, const char *name, char 
> *realname, struct link_map *l,
>/* The file might already be closed.  */
>if (fd != -1)
>  (void) __close_nocancel (fd);
> +  if (l != NULL && l->l_map_start != 0)
> +_dl_unmap_segments (l);
>if (l != NULL && l->l_origin != (char *) -1l)
>  free ((char *) l->l_origin);
> +  if (l != NULL && !l->l_libname->dont_free)
> +free (l->l_libname);
> +  if (l != NULL && l->l_phdr_allocated)
> +free ((void *) l->l_phdr);
> +
>free (l);
>free (realname);
>  
> @@ -1256,7 +1263,11 @@ _dl_map_object_from_fd (const char *name, const char 
> *origname, int fd,
>  errstring = _dl_map_segments (l, fd, header, type, loadcmds, nloadcmds,
> maplength, has_holes, loader);
>  if (__glibc_unlikely (errstring != NULL))
> -  goto call_lose;
> +  {
> + /* Mappings can be in an inconsistent state: avoid unmap.  */
> + l->l_map_start = l->l_map_end = 0;
> + goto call_lose;
> +  }
>  
>  /* Process program headers again after load segments are mapped in
> case processing requires accessing those segments.  Scan program

In this case I am failing to see who would be responsible to unmap 
l_map_start int the type == ET_DYN where first mmap succeeds but
with a later mmap failure in any load command.

> @@ -1294,14 +1305,6 @@ _dl_map_object_from_fd (const char *name, const char 
> *origname, int fd,
>|| (__glibc_unlikely (l->l_flags_1 & DF_1_PIE)
> && __glibc_unlikely ((mode & __RTLD_OPENEXEC) == 0)))
>  {
> -  /* We are not supposed to load this object.  Free all resources.  */
> -  _dl_unmap_segments (l);
> -
> -  if (!l->l_libname->dont_free)
> - free (l->l_libname);
> -
> -  if (l->l_phdr_allocated)
> - free ((void *) l->l_phdr);
>  
>if (l->l_flags_1 & DF_1_PIE)
>   errstring
> @@ -1392,6 +1395,9 @@ cannot enable executable stack as shared object 
> requires");
>/* Signal that we closed the file.  */
>fd = -1;
>  
> +  /* Failures before this point are handled locally via lose.
> + No more failures are allowed in this function until return.  */
> +
>/* If this is ET_EXEC, we should have loaded it as lt_executable.  */
>assert (type != ET_EXEC || l->l_type == lt_executable);
>  
> 

Ok.


Re: [PATCH v2 1/6] aarch64: Fix missing BTI protection from dependencies [BZ #26926]

2020-12-10 Thread Adhemerval Zanella



On 27/11/2020 10:19, Szabolcs Nagy via Libc-alpha wrote:
> The _dl_open_check and _rtld_main_check hooks are not called on the
> dependencies of a loaded module, so BTI protection was missed on
> every module other than the main executable and directly dlopened
> libraries.
> 
> The fix just iterates over dependencies to enable BTI.
> 
> Fixes bug 26926.

LGTM, modulus the argument name change.

I also think it would be better to add a testcase, for both DT_NEEDED
and dlopen case.

> ---
>  sysdeps/aarch64/dl-bti.c | 21 +
>  1 file changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c
> index 196e462520..8f4728adce 100644
> --- a/sysdeps/aarch64/dl-bti.c
> +++ b/sysdeps/aarch64/dl-bti.c
> @@ -51,11 +51,24 @@ enable_bti (struct link_map *map, const char *program)
>return 0;
>  }
>  
> -/* Enable BTI for L if required.  */
> +/* Enable BTI for MAP and its dependencies.  */
>  
>  void
> -_dl_bti_check (struct link_map *l, const char *program)
> +_dl_bti_check (struct link_map *map, const char *program)

I don't see much gain changing the argument name.

>  {
> -  if (GLRO(dl_aarch64_cpu_features).bti && l->l_mach.bti)
> -enable_bti (l, program);
> +  if (!GLRO(dl_aarch64_cpu_features).bti)
> +return;
> +
> +  if (map->l_mach.bti)
> +enable_bti (map, program);
> +
> +  unsigned int i = map->l_searchlist.r_nlist;
> +  while (i-- > 0)
> +{
> +  struct link_map *l = map->l_initfini[i];
> +  if (l->l_init_called)
> + continue;
> +  if (l->l_mach.bti)
> + enable_bti (l, program);
> +}
>  }
> 

Ok.


Re: clock_gettime64 vdso bug on 32-bit arm, rpi-4

2020-05-19 Thread Adhemerval Zanella



On 19/05/2020 16:54, Arnd Bergmann wrote:
> Jack Schmidt reported a bug for the arm32 clock_gettimeofday64 vdso call last
> month: https://github.com/richfelker/musl-cross-make/issues/96 and
> https://github.com/raspberrypi/linux/issues/3579
> 
> As Will Deacon pointed out, this was never reported on the mailing list,
> so I'll try to summarize what we know, so this can hopefully be resolved soon.
> 
> - This happened reproducibly on Linux-5.6 on a 32-bit Raspberry Pi patched
>kernel running on a 64-bit Raspberry Pi 4b (bcm2711) when calling
>clock_gettime64(CLOCK_REALTIME)

Does it happen with other clocks as well?

> 
> - The kernel tree is at https://github.com/raspberrypi/linux/, but I could
>   see no relevant changes compared to a mainline kernel.

Is this bug reproducible with mainline kernel or mainline kernel can't be
booted on bcm2711?

> 
> - From the report, I see that the returned time value is larger than the
>   expected time, by 3.4 to 14.5 million seconds in four samples, my
>   guess is that a random number gets added in at some point.

What kind code are you using to reproduce it? It is threaded or issue
clock_gettime from signal handlers?

> 
> - From other sources, I found that the Raspberry Pi clocksource runs
>   at 54 MHz, with a mask value of 0xff. From these numbers
>   I would expect that reading a completely random hardware register
>   value would result in an offset up to 1.33 billion seconds, which is
>   around factor 100 more than the error we see, though similar.
> 
> - The test case calls the musl clock_gettime() function, which falls back to
>   the clock_gettime64() syscall on kernels prior to 5.5, or to the 32-bit
>   clock_gettime() prior to Linux-5.1. As reported in the bug, Linux-4.19 does
>   not show the bug.
> 
> - The behavior was not reproduced on the same user space in qemu,
>   though I cannot tell whether the exact same kernel binary was used.
> 
> - glibc-2.31 calls the same clock_gettime64() vdso function on arm to
>   implement clock_gettime(), but earlier versions did not. I have not
>   seen any reports of this bug, which could be explained by users
>   generally being on older versions.
> 
> - As far as I can tell, there are no reports of this bug from other users,
>   and so far nobody could reproduce it.
> 
> - The current musl git tree has been patched to not call clock_gettime64
>on ARM because of this problem, so it cannot be used for reproducing it.

So should glibc follow musl and remove arm clock_gettime6y4 vDSO support
or this bug is localized to an specific kernel version running on an
specific hardware?

> 
> If anyone has other information that may help figure out what is going
> on, please share.
> 
> Arnd
> 


Re: d_off field in struct dirent and 32-on-64 emulation

2019-01-02 Thread Adhemerval Zanella



On 31/12/2018 15:03, Joseph Myers wrote:
> On Fri, 28 Dec 2018, Adhemerval Zanella wrote:
> 
>>>> Currently we only have nios2 and csky (unfortunately).  But since generic 
>>>> definition for off_t and off64_t still assumes non-LFS support, all new
>>>> 32-bits ports potentially might carry the issue.
>>>
>>> For csky, we could still change the type of the non-standard d_off
>>> field to long long int.  This way, only telldir would have to fail
>>> when truncation is necessary, as mentioned below:
>>
>> I think it makes no sense to continue making non-LFS as default for
>> newer 32 bits ports, the support will be emulated with LFS syscalls.
> 
> Any new 32-bit port that uses 64-bit time_t will also use 64-bit offsets 
> (because we don't have any glibc configurations that support the 
> combination of 64-bit time with 32-bit offsets, and don't want to add 
> them).  That should apply for RISC-V 32-bit at least.
> 
> I've filed <https://sourceware.org/bugzilla/show_bug.cgi?id=24050> for 
> missing overflow checks in telldir when the default off_t is wider than 
> long int (currently just applies to x32; not sure why we don't see glibc 
> test failures on x32 resulting from the quiet truncation, as the issue is 
> certainly there in the source code).
> 

What about csky? Should we still make it use 32-bit offsets as default
configuration even when kernel does not support it natively?


Re: d_off field in struct dirent and 32-on-64 emulation

2018-12-28 Thread Adhemerval Zanella



On 28/12/2018 10:01, Florian Weimer wrote:
> * Florian Weimer:
> 
>> * Adhemerval Zanella:
>>
>>> On 27/12/2018 16:09, Florian Weimer wrote:
>>>> * Adhemerval Zanella:
>>>>
>>>>> Also for glibc standpoint, although reverting it back to use getdents 
>>>>> syscall for non-LFS mode might fix this issue for architectures that
>>>>> provides non-LFS getdents syscall it won't be a fix for architectures 
>>>>> that still provides off_t different than off64_t *and* only provides 
>>>>> getdents64 syscall.
>>>>>
>>>>> Currently we only have nios2 and csky (unfortunately).  But since generic 
>>>>> definition for off_t and off64_t still assumes non-LFS support, all new
>>>>> 32-bits ports potentially might carry the issue.
>>>>
>>>> For csky, we could still change the type of the non-standard d_off
>>>> field to long long int.  This way, only telldir would have to fail
>>>> when truncation is necessary, as mentioned below:
>>>
>>> I think it makes no sense to continue making non-LFS as default for
>>> newer 32 bits ports, the support will be emulated with LFS syscalls.
>>
>> Sorry, I don't see how this matters.  seekdir and telldir are NOT
>> affected by LFS.
> 
> Ah, right.  If struct dirent is 64-bit only, then the d_off member
> will be 64 bits as well.  But it is unclear whether you can use that
> with lseek (probably yes, in its 64-bit variant), and it's unlikely
> it's going to work with seekdir because of the POSIX-required long int
> type.
> 

I was referring to all other API that uses off_t as well (pread for
instance), where new ports will have non-LFS variants that will call
only LFS variants.


Re: d_off field in struct dirent and 32-on-64 emulation

2018-12-28 Thread Adhemerval Zanella



On 27/12/2018 16:09, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> Also for glibc standpoint, although reverting it back to use getdents 
>> syscall for non-LFS mode might fix this issue for architectures that
>> provides non-LFS getdents syscall it won't be a fix for architectures 
>> that still provides off_t different than off64_t *and* only provides 
>> getdents64 syscall.
>>
>> Currently we only have nios2 and csky (unfortunately).  But since generic 
>> definition for off_t and off64_t still assumes non-LFS support, all new
>> 32-bits ports potentially might carry the issue.
> 
> For csky, we could still change the type of the non-standard d_off
> field to long long int.  This way, only telldir would have to fail
> when truncation is necessary, as mentioned below:

I think it makes no sense to continue making non-LFS as default for
newer 32 bits ports, the support will be emulated with LFS syscalls.



Re: d_off field in struct dirent and 32-on-64 emulation

2018-12-27 Thread Adhemerval Zanella



On 27/12/2018 15:18, Florian Weimer wrote:
> We have a bit of an interesting problem with respect to the d_off
> field in struct dirent.
> 
> When running a 64-bit kernel on certain file systems, notably ext4,
> this field uses the full 63 bits even for small directories (strace -v
> output, wrapped here for readability):
> 
> getdents(3, [
>   {d_ino=1494304, d_off=3901177228673045825, d_reclen=40, 
> d_name="authorized_keys", d_type=DT_REG},
>   {d_ino=1494277, d_off=7491915799041650922, d_reclen=24, d_name=".", 
> d_type=DT_DIR},
>   {d_ino=1314655, d_off=9223372036854775807, d_reclen=24, d_name="..", 
> d_type=DT_DIR}
> ], 32768) = 88
> 
> When running in 32-bit compat mode, this value is somehow truncated to
> 31 bits, for both the getdents and the getdents64 (!) system call (at
> least on i386).
> 
> In an effort to simplify support for future architectures which only
> have the getdents64 system call, we changed glibc 2.28 to use the
> getdents64 system call unconditionally, and perform translation if
> necessary.  This translation is noteworthy because it includes
> overflow checking for the d_ino and d_off members of struct dirent.
> We did not initially observe a regression because the kernel performs
> consistent d_off truncation (with the ext4 file system; small
> directories do not show this issue on XFS), so the overflow check does
> not fire.
> 
> However, both qemu-user and the 9p file system can run in such a way
> that the kernel is entered from a 64-bit process, but the actual usage
> is from a 32-bit process:
> 
>   
> 
> I think diagrammatically, this looks like this:
> 
>   guest process  (32-bit)
> | getdents64, 32-bit UAPI
>   qemu-user (64-bit)
> | getdents, 64-bit UAPI
>   host kernel (64-bit)
> 
> Or:
> 
>   guest process 
> | getdents64, 32-bit UAPI
>   guest kernel (64-bit)
> | 9p over virtio (64-bit d_off in struct p9_dirent)
>   qemu
> | getdents, 64-bit UAPI
>   host kernel (64-bit)
> 
> Back when we still called getdents, in the first case, the 32-bit
> getdents system call emulation in a 64-bit qemu-user process would
> just silently truncate the d_off field as part of the translation, not
> reporting an error.  The second case is more complicated, and I have
> not figured out where the truncation happens.
> 
> This truncation has always been a bug; it breaks telldir/seekdir at
> least in some cases.  But use of telldir/seekdir is comparatively
> rare.  In contrast, now that we detect d_off overflow in glibc,
> readdir will always fail in the sketched configurations, which is bad.
> (glibc exposes the d_off field to applications, and it cannot know
> whether the application will use it or not, so there is no direct way
> to restrict the overflow error to the telldir/seekdir use case.)
> 
> We could switch glibc to call getdents again if the system call is
> available.  But that merely relies on the existence of the truncation
> bug somewhere else in the file system stack.  This is why I don't
> think it's the right solution, just the path of least resistance.
> 
> I don't want to reimplement the ext4 truncation behavior in glibc (it
> doesn't look like a straightforward truncation), and it wouldn't work
> for the second scenario where we see the 9p file system in the 32-bit
> glibc, not the ext4 file system.  So that's not a good solution.

Also for glibc standpoint, although reverting it back to use getdents 
syscall for non-LFS mode might fix this issue for architectures that
provides non-LFS getdents syscall it won't be a fix for architectures 
that still provides off_t different than off64_t *and* only provides 
getdents64 syscall.

Currently we only have nios2 and csky (unfortunately).  But since generic 
definition for off_t and off64_t still assumes non-LFS support, all new
32-bits ports potentially might carry the issue.

> 
> There is another annoying aspect: The standards expose d_off through
> the telldir function, and that returns long int on all architectures
> (not off_t, so unchanged by _FILE_OFFSET_BITS).  That's mostly a
> userspace issue and thus needing different steps to resolve (possibly
> standards action).
> 
> Any suggestions how to solve this?  Why does the kernel return
> different d_off values for 32-bit and 64-bit processes even when using
> getdents64, for the same directory?
> 


Re: [PATCH 23/23] [AARCH64] Take utmp{,x}.h from s390 port

2016-06-28 Thread Adhemerval Zanella


On 28/06/2016 14:59, Yury Norov wrote:
> On Tue, Jun 28, 2016 at 05:18:04PM +, Joseph Myers wrote:
>> On Tue, 28 Jun 2016, Yury Norov wrote:
>>
>>> aarch64 and ilp32 has different size of time_t. So to have common
>>> layout for struct utmp and utmpx, corresponding headers are taken
>>> from s390 port.
>>
>> You can't #include installed headers from other ports like this; it 
>> wouldn't work when using an installed library.
>>
>> -- 
>> Joseph S. Myers
>> jos...@codesourcery.com
> 
> So I should copy? Hmm...
> 

In this case yes, since the reference will be broken because the s390
won't be installed.


Re: [PATCH 23/23] [AARCH64] Take utmp{,x}.h from s390 port

2016-06-28 Thread Adhemerval Zanella


On 28/06/2016 14:59, Yury Norov wrote:
> On Tue, Jun 28, 2016 at 05:18:04PM +, Joseph Myers wrote:
>> On Tue, 28 Jun 2016, Yury Norov wrote:
>>
>>> aarch64 and ilp32 has different size of time_t. So to have common
>>> layout for struct utmp and utmpx, corresponding headers are taken
>>> from s390 port.
>>
>> You can't #include installed headers from other ports like this; it 
>> wouldn't work when using an installed library.
>>
>> -- 
>> Joseph S. Myers
>> jos...@codesourcery.com
> 
> So I should copy? Hmm...
> 

In this case yes, since the reference will be broken because the s390
won't be installed.


Re: [PATCH 18/23] [AARCH64] ILP32: support stat syscall family

2016-06-28 Thread Adhemerval Zanella


On 28/06/2016 16:08, Yury Norov wrote:
> On Tue, Jun 28, 2016 at 05:15:13PM +, Joseph Myers wrote:
>>  still 
>> applies.  Unify implementations instead of proliferating variants.
> 
> I think on it. I don't see simple way to unify it right now. And I
> plan to take a vacation in next two weeks, so I'd like to share my 
> progress to community (mostly for kernel), as this series has some
> LTP tests fixed, and this is important for us.
> 
> What you talk about sounds unclear to me. If you mean to unify with
> one of existing ports, it looks unnecessary, as ilp32 will end up with 
> RISC-V anyway. If you mean to use RISC-V, it's not ready yet. I was
> thinking that when they will finish, they simply switch this port to
> their code. Am I too optimistic?

The idea is to avoid the proliferation of multiple implementation of
same function over multiple files.  This have the advantage to make
easy for new ports to add such functionality and simplify the code
base.  Take fstatfs{64} for instance:

$ find . -iname fstatfs*
./sysdeps/mach/hurd/fstatfs.c
./sysdeps/mach/hurd/fstatfs64.c
./sysdeps/unix/sysv/linux/generic/wordsize-32/fstatfs.c
./sysdeps/unix/sysv/linux/alpha/fstatfs64.c
./sysdeps/unix/sysv/linux/fstatfs64.c
./sysdeps/unix/sysv/linux/wordsize-64/fstatfs64.c
./sysdeps/unix/sysv/linux/mips/mips64/n64/fstatfs64.c
./io/fstatfs.c
./io/fstatfs64.c

The 'io' is the default one which is just a stub that return ENOSYS.
For Linux ideally we should aim to have just one implementation that
cover all the architectures/kernel limitation (the same idea I am
pushing with some consolidation patches).

It might be outside the scope of the port enablement, but it is usually
the opportunity to the refactor on such code.  And for such functions
it might require some work for some architecture idiosyncrasies (such
as alpha not providing fstat64), but I think it quite doable. 


> 
>> Also, much of the formatting is way off the GNU Coding Standards (e.g. 
>> indentation that's not two-column, "{" not on a line by itself), and 
>> you're missing descriptions as first lines of many new files.
> 
> Is there glibc analogue for kernel scripts/checkpatch.pl? If yes,
> please point me out, and I'll briefly fix all issues. If no please be
> patient to whitespace rules violations. I completely understand the
> importance of following the coding rules, but now I am little limited
> in time and prefer to fix real bugs first, and then read that document
> carefully and check all the mess I introduced.

Also keep in mind to remove the 'Contributed by ...' presented in some
files.

> 
> Yury
> 


Re: [PATCH 18/23] [AARCH64] ILP32: support stat syscall family

2016-06-28 Thread Adhemerval Zanella


On 28/06/2016 16:08, Yury Norov wrote:
> On Tue, Jun 28, 2016 at 05:15:13PM +, Joseph Myers wrote:
>>  still 
>> applies.  Unify implementations instead of proliferating variants.
> 
> I think on it. I don't see simple way to unify it right now. And I
> plan to take a vacation in next two weeks, so I'd like to share my 
> progress to community (mostly for kernel), as this series has some
> LTP tests fixed, and this is important for us.
> 
> What you talk about sounds unclear to me. If you mean to unify with
> one of existing ports, it looks unnecessary, as ilp32 will end up with 
> RISC-V anyway. If you mean to use RISC-V, it's not ready yet. I was
> thinking that when they will finish, they simply switch this port to
> their code. Am I too optimistic?

The idea is to avoid the proliferation of multiple implementation of
same function over multiple files.  This have the advantage to make
easy for new ports to add such functionality and simplify the code
base.  Take fstatfs{64} for instance:

$ find . -iname fstatfs*
./sysdeps/mach/hurd/fstatfs.c
./sysdeps/mach/hurd/fstatfs64.c
./sysdeps/unix/sysv/linux/generic/wordsize-32/fstatfs.c
./sysdeps/unix/sysv/linux/alpha/fstatfs64.c
./sysdeps/unix/sysv/linux/fstatfs64.c
./sysdeps/unix/sysv/linux/wordsize-64/fstatfs64.c
./sysdeps/unix/sysv/linux/mips/mips64/n64/fstatfs64.c
./io/fstatfs.c
./io/fstatfs64.c

The 'io' is the default one which is just a stub that return ENOSYS.
For Linux ideally we should aim to have just one implementation that
cover all the architectures/kernel limitation (the same idea I am
pushing with some consolidation patches).

It might be outside the scope of the port enablement, but it is usually
the opportunity to the refactor on such code.  And for such functions
it might require some work for some architecture idiosyncrasies (such
as alpha not providing fstat64), but I think it quite doable. 


> 
>> Also, much of the formatting is way off the GNU Coding Standards (e.g. 
>> indentation that's not two-column, "{" not on a line by itself), and 
>> you're missing descriptions as first lines of many new files.
> 
> Is there glibc analogue for kernel scripts/checkpatch.pl? If yes,
> please point me out, and I'll briefly fix all issues. If no please be
> patient to whitespace rules violations. I completely understand the
> importance of following the coding rules, but now I am little limited
> in time and prefer to fix real bugs first, and then read that document
> carefully and check all the mess I introduced.

Also keep in mind to remove the 'Contributed by ...' presented in some
files.

> 
> Yury
> 


Re: [RFC2 PATCH 00/23] ARM64: support ILP32

2016-06-28 Thread Adhemerval Zanella
Hi Yury,

Please address all previous reviews comment and concerns before send
a new RFC/patch set.  As Joseph pointed out, he raised different question
in different patch iterations (some even back from 2014) and there is
no point is just resent the same patch without first review such
comments.

On 28/06/2016 13:39, Yury Norov wrote:
> This series enables aarch64 port with ilp32 mode.
> 
> ABI details:
>  - types are taken from AARCH32, next types turned to 64-bit,
>as modern requirement for new APIs tells:
>   ino_t  is  u64 type
>   off_t  is  s64 type
>   blkcnt_t   is  s64 type
>   fsblkcnt_t is  u64 type
>   fsfilcnt_t is  u64 type
>  - 64-bit arguments are passed in syscall as register pair,
>as kernel internally clears top halves for all input regs;
>  - standard syscall table is used;
>  - 32-bit time_t is used. AARCH64/ILP32 is waiting for general
>fix of Y2038 problem just like other 32-bit arches;
>  - stat{64}, statfs{64} structures are of the identical layout
>with LP64. Corresponding syscalls are taken from 64-bit code
>  - struct utmp, utmpx layouts are the same.
> 
> v1: https://sourceware.org/ml/libc-alpha/2016-06/msg00730.html
> v2:
>  - rebased on master;
>  - dropped unneeded/unrelated patches;
>  - pread family platform implementation removed;
>  - addressed v1 comments (I'm really sorry if I missed something,
>there are a lot of them, and I am really thankfull for detailed review);
> 
> Tested with LTP. Regressions are like this:
> abort01FAIL   2
> kill11 FAIL   2
> mmap16 FAIL   6
> open12 FAIL   2
> rename11   FAIL   2
> rmdir02FAIL   2
> umount2_01 FAIL   2
> umount2_02 FAIL   2
> umount2_03 FAIL   2
> utime06FAIL   2
> 
> It's better than v1, and there are ~5 additional regressions comparing to
> aarch64, all are related to filesystem.
> 
> Andrew Pinski (17):
>   [AARCH64] define word size for lp64 and ilp32
>   [AARCH64] Add header guards to sysdep.h headers.
>   Add dynamic ILP32 AARCH64 relocations to elf.h
>   [AARCH64] Add PTR_REG, PTR_LOG_SIZE, and PTR_SIZE.  Use it in
> LDST_PCREL and LDST_GLOBAL.
>   [AARCH64] Use PTR_REG in crti.S.
>   [AARCH64] Use PTR_REG/PTR_SIZE/PTR_SIZE_LOG in dl-tlsesc.S
>   [AARCH64] Use PTR_* macros in dl-trampoline.S
>   [AARCH64] Use PTR_* in start.S
>   [AARCH64] Use PTR_REG in getcontext.S.
>   [AARCH64] Detect ILP32 in configure scripts.
>   [AARCH64] Add ILP32 support to elf_machine_load_address.
>   [AARCH64] Add ILP32 to makefiles
>   [AARCH64] Add support to ldconfig for ILP32 and libilp32
>   [AARCH64] Add ILP32 ld.so to the known interpreter names.
>   [AARCH64] Add ldd-rewrite.sed so that ilp32 ld.so can be found
>   [AARCH64] Make lp64 and ilp32 directories.
>   [AARCH64] Fix ILP32 warning
> 
> Yury Norov (6):
>   [AARCH64] ILP32: introduce syscalls that pass off_t
>   [AARCH64] ILP32: support stat syscall family
>   [AARCH64] delouse input arguments in system functions
>   [AARCH64] Make __SIZEOF_SEM_T 16 for ILP32
>   off_t: fix register pair calculation for 64-bit case
>   [AARCH64] Take utmp{,x}.h from s390 port
> 
>  elf/cache.c|   3 +
>  sysdeps/aarch64/Implies|   6 -
>  sysdeps/aarch64/__longjmp.S|   6 +-
>  sysdeps/aarch64/bits/wordsize.h|  25 +++
>  sysdeps/aarch64/configure  |  15 +-
>  sysdeps/aarch64/configure.ac   |  11 +-
>  sysdeps/aarch64/crti.S |   3 +-
>  sysdeps/aarch64/dl-irel.h  |   3 +-
>  sysdeps/aarch64/dl-machine.h   | 199 
> -
>  sysdeps/aarch64/dl-tlsdesc.S   |  56 +++---
>  sysdeps/aarch64/dl-trampoline.S|  18 +-
>  sysdeps/aarch64/ilp32/Implies  |   6 +
>  sysdeps/aarch64/jmpbuf-unwind.h|   2 +-
>  sysdeps/aarch64/lp64/Implies   |   7 +
>  sysdeps/aarch64/memcmp.S   |   3 +
>  sysdeps/aarch64/memcpy.S   |   8 +
>  sysdeps/aarch64/memset.S   |   3 +
>  sysdeps/aarch64/nptl/bits/semaphore.h  |   4 +
>  sysdeps/aarch64/preconfigure   |  11 +-
>  sysdeps/aarch64/setjmp.S   |   5 +-
>  sysdeps/aarch64/start.S|  20 ++-
>  sysdeps/aarch64/strchr.S   |   1 +
>  sysdeps/aarch64/strchrnul.S|   1 +
>  sysdeps/aarch64/strcmp.S   |   2 +
>  sysdeps/aarch64/strcpy.S   |   2 +
>  sysdeps/aarch64/strlen.S   

Re: [RFC2 PATCH 00/23] ARM64: support ILP32

2016-06-28 Thread Adhemerval Zanella
Hi Yury,

Please address all previous reviews comment and concerns before send
a new RFC/patch set.  As Joseph pointed out, he raised different question
in different patch iterations (some even back from 2014) and there is
no point is just resent the same patch without first review such
comments.

On 28/06/2016 13:39, Yury Norov wrote:
> This series enables aarch64 port with ilp32 mode.
> 
> ABI details:
>  - types are taken from AARCH32, next types turned to 64-bit,
>as modern requirement for new APIs tells:
>   ino_t  is  u64 type
>   off_t  is  s64 type
>   blkcnt_t   is  s64 type
>   fsblkcnt_t is  u64 type
>   fsfilcnt_t is  u64 type
>  - 64-bit arguments are passed in syscall as register pair,
>as kernel internally clears top halves for all input regs;
>  - standard syscall table is used;
>  - 32-bit time_t is used. AARCH64/ILP32 is waiting for general
>fix of Y2038 problem just like other 32-bit arches;
>  - stat{64}, statfs{64} structures are of the identical layout
>with LP64. Corresponding syscalls are taken from 64-bit code
>  - struct utmp, utmpx layouts are the same.
> 
> v1: https://sourceware.org/ml/libc-alpha/2016-06/msg00730.html
> v2:
>  - rebased on master;
>  - dropped unneeded/unrelated patches;
>  - pread family platform implementation removed;
>  - addressed v1 comments (I'm really sorry if I missed something,
>there are a lot of them, and I am really thankfull for detailed review);
> 
> Tested with LTP. Regressions are like this:
> abort01FAIL   2
> kill11 FAIL   2
> mmap16 FAIL   6
> open12 FAIL   2
> rename11   FAIL   2
> rmdir02FAIL   2
> umount2_01 FAIL   2
> umount2_02 FAIL   2
> umount2_03 FAIL   2
> utime06FAIL   2
> 
> It's better than v1, and there are ~5 additional regressions comparing to
> aarch64, all are related to filesystem.
> 
> Andrew Pinski (17):
>   [AARCH64] define word size for lp64 and ilp32
>   [AARCH64] Add header guards to sysdep.h headers.
>   Add dynamic ILP32 AARCH64 relocations to elf.h
>   [AARCH64] Add PTR_REG, PTR_LOG_SIZE, and PTR_SIZE.  Use it in
> LDST_PCREL and LDST_GLOBAL.
>   [AARCH64] Use PTR_REG in crti.S.
>   [AARCH64] Use PTR_REG/PTR_SIZE/PTR_SIZE_LOG in dl-tlsesc.S
>   [AARCH64] Use PTR_* macros in dl-trampoline.S
>   [AARCH64] Use PTR_* in start.S
>   [AARCH64] Use PTR_REG in getcontext.S.
>   [AARCH64] Detect ILP32 in configure scripts.
>   [AARCH64] Add ILP32 support to elf_machine_load_address.
>   [AARCH64] Add ILP32 to makefiles
>   [AARCH64] Add support to ldconfig for ILP32 and libilp32
>   [AARCH64] Add ILP32 ld.so to the known interpreter names.
>   [AARCH64] Add ldd-rewrite.sed so that ilp32 ld.so can be found
>   [AARCH64] Make lp64 and ilp32 directories.
>   [AARCH64] Fix ILP32 warning
> 
> Yury Norov (6):
>   [AARCH64] ILP32: introduce syscalls that pass off_t
>   [AARCH64] ILP32: support stat syscall family
>   [AARCH64] delouse input arguments in system functions
>   [AARCH64] Make __SIZEOF_SEM_T 16 for ILP32
>   off_t: fix register pair calculation for 64-bit case
>   [AARCH64] Take utmp{,x}.h from s390 port
> 
>  elf/cache.c|   3 +
>  sysdeps/aarch64/Implies|   6 -
>  sysdeps/aarch64/__longjmp.S|   6 +-
>  sysdeps/aarch64/bits/wordsize.h|  25 +++
>  sysdeps/aarch64/configure  |  15 +-
>  sysdeps/aarch64/configure.ac   |  11 +-
>  sysdeps/aarch64/crti.S |   3 +-
>  sysdeps/aarch64/dl-irel.h  |   3 +-
>  sysdeps/aarch64/dl-machine.h   | 199 
> -
>  sysdeps/aarch64/dl-tlsdesc.S   |  56 +++---
>  sysdeps/aarch64/dl-trampoline.S|  18 +-
>  sysdeps/aarch64/ilp32/Implies  |   6 +
>  sysdeps/aarch64/jmpbuf-unwind.h|   2 +-
>  sysdeps/aarch64/lp64/Implies   |   7 +
>  sysdeps/aarch64/memcmp.S   |   3 +
>  sysdeps/aarch64/memcpy.S   |   8 +
>  sysdeps/aarch64/memset.S   |   3 +
>  sysdeps/aarch64/nptl/bits/semaphore.h  |   4 +
>  sysdeps/aarch64/preconfigure   |  11 +-
>  sysdeps/aarch64/setjmp.S   |   5 +-
>  sysdeps/aarch64/start.S|  20 ++-
>  sysdeps/aarch64/strchr.S   |   1 +
>  sysdeps/aarch64/strchrnul.S|   1 +
>  sysdeps/aarch64/strcmp.S   |   2 +
>  sysdeps/aarch64/strcpy.S   |   2 +
>  sysdeps/aarch64/strlen.S