Re: [apparmor] [PATCH 0/2] apparmor unaligned memory fixes

John Paul Adrian Glaubitz Sat, 29 Nov 2025 00:12:17 -0800

Hi Helge,

On Wed, 2025-11-26 at 21:15 +0100, Helge Deller wrote:
> * John Johansen <[email protected]>:
> > On 11/26/25 07:12, Helge Deller wrote:
> > > * david laight <[email protected]>:
> > > > On Wed, 26 Nov 2025 12:03:03 +0100
> > > > Helge Deller <[email protected]> wrote:
> > > > 
> > > > > On 11/26/25 11:44, david laight wrote:
> > > > ...
> > > > > > > diff --git a/security/apparmor/match.c b/security/apparmor/match.c
> > > > > > > index 26e82ba879d44..3dcc342337aca 100644
> > > > > > > --- a/security/apparmor/match.c
> > > > > > > +++ b/security/apparmor/match.c
> > > > > > > @@ -71,10 +71,10 @@ static struct table_header *unpack_table(char 
> > > > > > > *blob, size_t bsize)
> > > > > > >                                        u8, u8, byte_to_byte);
> > > > > > 
> > > > > > Is that that just memcpy() ?
> > > > > 
> > > > > No, it's memcpy() only on big-endian machines.
> > > > 
> > > > You've misread the quoting...
> > > > The 'data8' case that was only half there is a memcpy().
> > > > 
> > > > > On little-endian machines it converts from big-endian
> > > > > 16/32-bit ints to little-endian 16/32-bit ints.
> > > > > 
> > > > > But I see some potential for optimization here:
> > > > > a) on big-endian machines just use memcpy()
> > > > 
> > > > true
> > > > 
> > > > > b) on little-endian machines use memcpy() to copy from 
> > > > > possibly-unaligned
> > > > >      memory to then known-to-be-aligned destination. Then use a loop 
> > > > > with
> > > > >      be32_to_cpu() instead of get_unaligned_xx() as it's faster.
> > > > 
> > > > There is a function that does a loop byteswap of a buffer - no reason
> > > > to re-invent it.
> > > 
> > > I assumed there must be something, but I did not see it. Which one?
> > > 
> > > > But I doubt it is always (if ever) faster to do a copy and then 
> > > > byteswap.
> > > > The loop control and extra memory accesses kill performance.
> > > 
> > > Yes, you are probably right.
> > > 
> > > > Not that I've seen a fast get_unaligned() - I don't think gcc or clang
> > > > generate optimal code - For LE I think it is something like:
> > > >         low = *(addr & ~3);
> > > >         high = *((addr + 3) & ~3);
> > > >         shift = (addr & 3) * 8;
> > > >         value = low << shift | high >> (32 - shift);
> > > > Note that it is only 2 aligned memory reads - even for 64bit.
> > > 
> > > Ok, then maybe we should keep it simple like this patch:
> > > 
> > > [PATCH v2] apparmor: Optimize table creation from possibly unaligned 
> > > memory
> > > 
> > > Source blob may come from userspace and might be unaligned.
> > > Try to optize the copying process by avoiding unaligned memory accesses.
> > > 
> > > Signed-off-by: Helge Deller <[email protected]>
> > > 
> > > diff --git a/security/apparmor/include/match.h 
> > > b/security/apparmor/include/match.h
> > > index 1fbe82f5021b..386da2023d50 100644
> > > --- a/security/apparmor/include/match.h
> > > +++ b/security/apparmor/include/match.h
> > > @@ -104,16 +104,20 @@ struct aa_dfa {
> > >           struct table_header *tables[YYTD_ID_TSIZE];
> > >   };
> > > -#define byte_to_byte(X) (X)
> > > +#define byte_to_byte(X) (*(X))
> > >   #define UNPACK_ARRAY(TABLE, BLOB, LEN, TTYPE, BTYPE, NTOHX)     \
> > >           do { \
> > >                   typeof(LEN) __i; \
> > >                   TTYPE *__t = (TTYPE *) TABLE; \
> > >                   BTYPE *__b = (BTYPE *) BLOB; \
> > > -         for (__i = 0; __i < LEN; __i++) { \
> > > -                 __t[__i] = NTOHX(__b[__i]); \
> > > -         } \
> > > +         BUILD_BUG_ON(sizeof(TTYPE) != sizeof(BTYPE)); \
> > > +         if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) || sizeof(BTYPE) == 1) \
> > > +                 memcpy(__t, __b, (LEN) * sizeof(BTYPE)); \
> > > +         else /* copy & convert convert from big-endian */ \
> > > +                 for (__i = 0; __i < LEN; __i++) { \
> > > +                         __t[__i] = NTOHX(&__b[__i]); \
> > > +                 } \
> > >           } while (0)
> > >   static inline size_t table_size(size_t len, size_t el_size)
> > > diff --git a/security/apparmor/match.c b/security/apparmor/match.c
> > > index c5a91600842a..13e2f6873329 100644
> > > --- a/security/apparmor/match.c
> > > +++ b/security/apparmor/match.c
> > > @@ -15,6 +15,7 @@
> > >   #include <linux/vmalloc.h>
> > >   #include <linux/err.h>
> > >   #include <linux/kref.h>
> > > +#include <linux/unaligned.h>
> > >   #include "include/lib.h"
> > >   #include "include/match.h"
> > > @@ -70,10 +71,10 @@ static struct table_header *unpack_table(char *blob, 
> > > size_t bsize)
> > >                                        u8, u8, byte_to_byte);
> > >                   else if (th.td_flags == YYTD_DATA16)
> > >                           UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
> > > -                              u16, __be16, be16_to_cpu);
> > > +                              u16, __be16, get_unaligned_be16);
> > >                   else if (th.td_flags == YYTD_DATA32)
> > >                           UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
> > > -                              u32, __be32, be32_to_cpu);
> > > +                              u32, __be32, get_unaligned_be32);
> > >                   else
> > >                           goto fail;
> > >                   /* if table was vmalloced make sure the page tables are 
> > > synced
> > 
> > I think we can make one more tweak, in just not using UNPACK_ARRAY at all 
> > for the byte case
> > ie.
> > 
> > diff --git a/security/apparmor/match.c b/security/apparmor/match.c
> > index 26e82ba879d44..389202560675c 100644
> > --- a/security/apparmor/match.c
> > +++ b/security/apparmor/match.c
> > @@ -67,8 +67,7 @@ static struct table_header *unpack_table(char *blob, 
> > size_t bsize)
> >             table->td_flags = th.td_flags;
> >             table->td_lolen = th.td_lolen;
> >             if (th.td_flags == YYTD_DATA8)
> > -                   UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
> > -                                u8, u8, byte_to_byte);
> > +                   memcp(table->td_data, blob, th.td_lolen);
> 
> True.
> Then byte_to_byte() can go away in match.h as well.
> So, here is a (untested) v3:
> 
> 
> [PATCH v3] apparmor: Optimize table creation from possibly unaligned memory
> 
> Source blob may come from userspace and might be unaligned.
> Try to optize the copying process by avoiding unaligned memory accesses.
> 
> Signed-off-by: Helge Deller <[email protected]>
> 
> diff --git a/security/apparmor/include/match.h 
> b/security/apparmor/include/match.h
> index 1fbe82f5021b..19e72b3e8f49 100644
> --- a/security/apparmor/include/match.h
> +++ b/security/apparmor/include/match.h
> @@ -104,16 +104,18 @@ struct aa_dfa {
>       struct table_header *tables[YYTD_ID_TSIZE];
>  };
>  
> -#define byte_to_byte(X) (X)
> -
>  #define UNPACK_ARRAY(TABLE, BLOB, LEN, TTYPE, BTYPE, NTOHX)  \
>       do { \
>               typeof(LEN) __i; \
>               TTYPE *__t = (TTYPE *) TABLE; \
>               BTYPE *__b = (BTYPE *) BLOB; \
> -             for (__i = 0; __i < LEN; __i++) { \
> -                     __t[__i] = NTOHX(__b[__i]); \
> -             } \
> +             BUILD_BUG_ON(sizeof(TTYPE) != sizeof(BTYPE)); \
> +             if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) \
> +                     memcpy(__t, __b, (LEN) * sizeof(BTYPE)); \
> +             else /* copy & convert convert from big-endian */ \
> +                     for (__i = 0; __i < LEN; __i++) { \
> +                             __t[__i] = NTOHX(&__b[__i]); \
> +                     } \
>       } while (0)
>  
>  static inline size_t table_size(size_t len, size_t el_size)
> diff --git a/security/apparmor/match.c b/security/apparmor/match.c
> index c5a91600842a..1e32c8ba14ae 100644
> --- a/security/apparmor/match.c
> +++ b/security/apparmor/match.c
> @@ -15,6 +15,7 @@
>  #include <linux/vmalloc.h>
>  #include <linux/err.h>
>  #include <linux/kref.h>
> +#include <linux/unaligned.h>
>  
>  #include "include/lib.h"
>  #include "include/match.h"
> @@ -66,14 +67,13 @@ static struct table_header *unpack_table(char *blob, 
> size_t bsize)
>               table->td_flags = th.td_flags;
>               table->td_lolen = th.td_lolen;
>               if (th.td_flags == YYTD_DATA8)
> -                     UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
> -                                  u8, u8, byte_to_byte);
> +                     memcpy(table->td_data, blob, th.td_lolen);
>               else if (th.td_flags == YYTD_DATA16)
>                       UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
> -                                  u16, __be16, be16_to_cpu);
> +                                  u16, __be16, get_unaligned_be16);
>               else if (th.td_flags == YYTD_DATA32)
>                       UNPACK_ARRAY(table->td_data, blob, th.td_lolen,
> -                                  u32, __be32, be32_to_cpu);
> +                                  u32, __be32, get_unaligned_be32);
>               else
>                       goto fail;
>               /* if table was vmalloced make sure the page tables are synced


I have applied both patches, the latter required minimal rework, and I confirm 
the issue is gone.

Could you post a cleaned up series with both patches so I can add my Tested-by?

Thanks,
Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer
`. `'   Physicist
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913

Re: [apparmor] [PATCH 0/2] apparmor unaligned memory fixes

Reply via email to