Hi Paul,

> Also, it's better to keep code simple when possible. I came up with a 
> simpler fix for the -fstrict-align bug that generates identical aligned 
> code for GCC 11 (didn't bother going back to GCC 10) and installed the 
> attached set of patches to do that.

I vehemently disagree with this simplification.

I just spent a whole day creating working, optimized *_aligned_* functions.
(The purpose of these functions is to be optimized, otherwise the
load8_* / store8_* functions without _aligned_ could be used.)
And your patch undoes my optimizations.

No, your simplified code does *NOT* generate the same code. If you came
to this impression, you probably must have tested x86_64, x86, arm64 only.

Try testing with some new arch (riscv64) or some older arch (alpha or sparc64).
I attach the code of the *_aligned_* functions, from my stdbit.in.h and from
yours.
  $ riscv64-linux-gnu-gcc -O2 -S -fomit-frame-pointer loadstore8-bruno.c
  $ riscv64-linux-gnu-gcc -O2 -S -fomit-frame-pointer loadstore8-paul.c

Take as example the stdc_load8_aligned_leu32 function.
In loadstore8-bruno.s:

stdc_load8_aligned_leu32:
        lw      a0,0(a0)
        ret

In loadstore8-paul.s:

stdc_load8_aligned_leu32:
.LFB14:
        lbu     a4,1(a0)
        lbu     a3,0(a0)
        lbu     a5,2(a0)
        lbu     a0,3(a0)
        slli    a4,a4,8
        or      a4,a4,a3
        slli    a5,a5,16
        or      a5,a5,a4
        slli    a0,a0,24
        or      a0,a0,a5
        sext.w  a0,a0
        ret

You don't need to benchmark these in order to see which is faster.
It is obvious: 2 instructions vs. 12 instructions.

Please revert this major de-optimization.

> These patches also remove casts that 
> aren't needed (some of which confused me a bit).

The casts to signed intN_t types were there for clarity.

The casts to uint_fast16_t were there for speed. On some architectures,
such as sparc, it is more efficient to work with 32-bit integers than with
16-bit integers, and the definition of uint_fast16_t as uint32_t embodies
this knowledge. Removing these casts is also a de-optimization that no one
has asked for.

Bruno
#include <stdint.h>
#include <string.h>
#include <byteswap.h>

#if (defined __clang__ ? __clang_major__ >= 4 : \
     (defined __GNUC__ \
      && (defined __cplusplus \
          ? __GNUC__ + (__GNUC_MINOR__ >= 9) > 4 \
          : __GNUC__ + (__GNUC_MINOR__ >= 7) > 4)))
# define _GL_LOADSTORE8_VARIANT_A 1
#elif defined _MSC_VER
# define _GL_LOADSTORE8_VARIANT_E 1
#else
# define _GL_LOADSTORE8_VARIANT_F 1
#endif

uint_least8_t
stdc_load8_aligned_beu8 (const unsigned char ptr[1])
{
  return ptr[0];
}

uint_least16_t
stdc_load8_aligned_beu16 (const unsigned char ptr[2])
{
# if _GL_LOADSTORE8_VARIANT_F
  return ((uint_fast16_t) ptr[0] << 8) | (uint_fast16_t) ptr[1];
# else
  uint16_t value;
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (&value, __builtin_assume_aligned (ptr, 2), 2);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (&value, ptr, 2);
#  endif
#  ifdef WORDS_BIGENDIAN
  return value;
#  else
  return bswap_16 (value);
#  endif
# endif
}

uint_least32_t
stdc_load8_aligned_beu32 (const unsigned char ptr[4])
{
# if _GL_LOADSTORE8_VARIANT_F
  return ((uint_fast32_t) ptr[0] << 24) | ((uint_fast32_t) ptr[1] << 16)
         | ((uint_fast32_t) ptr[2] << 8) | (uint_fast32_t) ptr[3];
# else
  uint32_t value;
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (&value, __builtin_assume_aligned (ptr, 4), 4);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (&value, ptr, 4);
#  endif
#  ifdef WORDS_BIGENDIAN
  return value;
#  else
  return bswap_32 (value);
#  endif
# endif
}

uint_least64_t
stdc_load8_aligned_beu64 (const unsigned char ptr[8])
{
# if _GL_LOADSTORE8_VARIANT_F
  return ((uint_fast64_t) ptr[0] << 56) | ((uint_fast64_t) ptr[1] << 48)
         | ((uint_fast64_t) ptr[2] << 40) | ((uint_fast64_t) ptr[3] << 32)
         | ((uint_fast64_t) ptr[4] << 24) | ((uint_fast64_t) ptr[5] << 16)
         | ((uint_fast64_t) ptr[6] << 8) | (uint_fast64_t) ptr[7];
# else
  uint64_t value;
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (&value, __builtin_assume_aligned (ptr, 8), 8);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (&value, ptr, 8);
#  endif
#  ifdef WORDS_BIGENDIAN
  return value;
#  else
  return bswap_64 (value);
#  endif
# endif
}

uint_least8_t
stdc_load8_aligned_leu8 (const unsigned char ptr[1])
{
  return ptr[0];
}

uint_least16_t
stdc_load8_aligned_leu16 (const unsigned char ptr[2])
{
# if _GL_LOADSTORE8_VARIANT_F
  return (uint_fast16_t) ptr[0] | ((uint_fast16_t) ptr[1] << 8);
# else
  uint16_t value;
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (&value, __builtin_assume_aligned (ptr, 2), 2);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (&value, ptr, 2);
#  endif
#  ifdef WORDS_BIGENDIAN
  return bswap_16 (value);
#  else
  return value;
#  endif
# endif
}

uint_least32_t
stdc_load8_aligned_leu32 (const unsigned char ptr[4])
{
# if _GL_LOADSTORE8_VARIANT_F
  return (uint_fast32_t) ptr[0] | ((uint_fast32_t) ptr[1] << 8)
         | ((uint_fast32_t) ptr[2] << 16) | ((uint_fast32_t) ptr[3] << 24);
# else
  uint32_t value;
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (&value, __builtin_assume_aligned (ptr, 4), 4);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (&value, ptr, 4);
#  endif
#  ifdef WORDS_BIGENDIAN
  return bswap_32 (value);
#  else
  return value;
#  endif
# endif
}

uint_least64_t
stdc_load8_aligned_leu64 (const unsigned char ptr[8])
{
# if _GL_LOADSTORE8_VARIANT_F
  return (uint_fast64_t) ptr[0] | ((uint_fast64_t) ptr[1] << 8)
         | ((uint_fast64_t) ptr[2] << 16) | ((uint_fast64_t) ptr[3] << 24)
         | ((uint_fast64_t) ptr[4] << 32) | ((uint_fast64_t) ptr[5] << 40)
         | ((uint_fast64_t) ptr[6] << 48) | ((uint_fast64_t) ptr[7] << 56);
# else
  uint64_t value;
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (&value, __builtin_assume_aligned (ptr, 8), 8);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (&value, ptr, 8);
#  endif
#  ifdef WORDS_BIGENDIAN
  return bswap_64 (value);
#  else
  return value;
#  endif
# endif
}

static inline uint_least8_t
stdc_load8_beu8 (const unsigned char ptr[1])
{
  return ptr[0];
}

static inline uint_least16_t
stdc_load8_beu16 (const unsigned char ptr[2])
{
  return ((uint_fast16_t) ptr[0] << 8) | (uint_fast16_t) ptr[1];
}

static inline uint_least32_t
stdc_load8_beu32 (const unsigned char ptr[4])
{
  return ((uint_fast32_t) ptr[0] << 24) | ((uint_fast32_t) ptr[1] << 16)
         | ((uint_fast32_t) ptr[2] << 8) | (uint_fast32_t) ptr[3];
}

static inline uint_least64_t
stdc_load8_beu64 (const unsigned char ptr[8])
{
  return ((uint_fast64_t) ptr[0] << 56) | ((uint_fast64_t) ptr[1] << 48)
         | ((uint_fast64_t) ptr[2] << 40) | ((uint_fast64_t) ptr[3] << 32)
         | ((uint_fast64_t) ptr[4] << 24) | ((uint_fast64_t) ptr[5] << 16)
         | ((uint_fast64_t) ptr[6] << 8) | (uint_fast64_t) ptr[7];
}

static inline uint_least8_t
stdc_load8_leu8 (const unsigned char ptr[1])
{
  return ptr[0];
}

static inline uint_least16_t
stdc_load8_leu16 (const unsigned char ptr[2])
{
  return (uint_fast16_t) ptr[0] | ((uint_fast16_t) ptr[1] << 8);
}

static inline uint_least32_t
stdc_load8_leu32 (const unsigned char ptr[4])
{
  return (uint_fast32_t) ptr[0] | ((uint_fast32_t) ptr[1] << 8)
         | ((uint_fast32_t) ptr[2] << 16) | ((uint_fast32_t) ptr[3] << 24);
}

static inline uint_least64_t
stdc_load8_leu64 (const unsigned char ptr[8])
{
  return (uint_fast64_t) ptr[0] | ((uint_fast64_t) ptr[1] << 8)
         | ((uint_fast64_t) ptr[2] << 16) | ((uint_fast64_t) ptr[3] << 24)
         | ((uint_fast64_t) ptr[4] << 32) | ((uint_fast64_t) ptr[5] << 40)
         | ((uint_fast64_t) ptr[6] << 48) | ((uint_fast64_t) ptr[7] << 56);
}

void
stdc_store8_aligned_beu8 (uint_least8_t value, unsigned char ptr[1])
{
  ptr[0] = value;
}

void
stdc_store8_aligned_beu16 (uint_least16_t value, unsigned char ptr[2])
{
# if _GL_LOADSTORE8_VARIANT_F
  ptr[0] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[1] = (unsigned char) value & 0xFFU;
# else
  uint16_t uvalue;
#  ifdef WORDS_BIGENDIAN
  uvalue = value;
#  else
  uvalue = bswap_16 (value);
#  endif
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (__builtin_assume_aligned (ptr, 2), &uvalue, 2);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (ptr, &uvalue, 2);
#  endif
# endif
}

void
stdc_store8_aligned_beu32 (uint_least32_t value, unsigned char ptr[4])
{
# if _GL_LOADSTORE8_VARIANT_F
  ptr[0] = (unsigned char) (value >> 24) & 0xFFU;
  ptr[1] = (unsigned char) (value >> 16) & 0xFFU;
  ptr[2] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[3] = (unsigned char) value & 0xFFU;
# else
  uint32_t uvalue;
#  ifdef WORDS_BIGENDIAN
  uvalue = value;
#  else
  uvalue = bswap_32 (value);
#  endif
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (__builtin_assume_aligned (ptr, 4), &uvalue, 4);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (ptr, &uvalue, 4);
#  endif
# endif
}

void
stdc_store8_aligned_beu64 (uint_least64_t value, unsigned char ptr[8])
{
# if _GL_LOADSTORE8_VARIANT_F
  ptr[0] = (unsigned char) (value >> 56) & 0xFFU;
  ptr[1] = (unsigned char) (value >> 48) & 0xFFU;
  ptr[2] = (unsigned char) (value >> 40) & 0xFFU;
  ptr[3] = (unsigned char) (value >> 32) & 0xFFU;
  ptr[4] = (unsigned char) (value >> 24) & 0xFFU;
  ptr[5] = (unsigned char) (value >> 16) & 0xFFU;
  ptr[6] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[7] = (unsigned char) value & 0xFFU;
# else
  uint64_t uvalue;
#  ifdef WORDS_BIGENDIAN
  uvalue = value;
#  else
  uvalue = bswap_64 (value);
#  endif
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (__builtin_assume_aligned (ptr, 8), &uvalue, 8);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (ptr, &uvalue, 8);
#  endif
# endif
}

void
stdc_store8_aligned_leu8 (uint_least8_t value, unsigned char ptr[1])
{
  ptr[0] = value;
}

void
stdc_store8_aligned_leu16 (uint_least16_t value, unsigned char ptr[2])
{
# if _GL_LOADSTORE8_VARIANT_F
  ptr[0] = (unsigned char) value & 0xFFU;
  ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
# else
  uint16_t uvalue;
#  ifdef WORDS_BIGENDIAN
  uvalue = bswap_16 (value);
#  else
  uvalue = value;
#  endif
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (__builtin_assume_aligned (ptr, 2), &uvalue, 2);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (ptr, &uvalue, 2);
#  endif
# endif
}

void
stdc_store8_aligned_leu32 (uint_least32_t value, unsigned char ptr[4])
{
# if _GL_LOADSTORE8_VARIANT_F
  ptr[0] = (unsigned char) value & 0xFFU;
  ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[2] = (unsigned char) (value >> 16) & 0xFFU;
  ptr[3] = (unsigned char) (value >> 24) & 0xFFU;
# else
  uint32_t uvalue;
#  ifdef WORDS_BIGENDIAN
  uvalue = bswap_32 (value);
#  else
  uvalue = value;
#  endif
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (__builtin_assume_aligned (ptr, 4), &uvalue, 4);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (ptr, &uvalue, 4);
#  endif
# endif
}

void
stdc_store8_aligned_leu64 (uint_least64_t value, unsigned char ptr[8])
{
# if _GL_LOADSTORE8_VARIANT_F
  ptr[0] = (unsigned char) value & 0xFFU;
  ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[2] = (unsigned char) (value >> 16) & 0xFFU;
  ptr[3] = (unsigned char) (value >> 24) & 0xFFU;
  ptr[4] = (unsigned char) (value >> 32) & 0xFFU;
  ptr[5] = (unsigned char) (value >> 40) & 0xFFU;
  ptr[6] = (unsigned char) (value >> 48) & 0xFFU;
  ptr[7] = (unsigned char) (value >> 56) & 0xFFU;
# else
  uint64_t uvalue;
#  ifdef WORDS_BIGENDIAN
  uvalue = bswap_64 (value);
#  else
  uvalue = value;
#  endif
#  if _GL_LOADSTORE8_VARIANT_A
  memcpy (__builtin_assume_aligned (ptr, 8), &uvalue, 8);
#  else /* _GL_LOADSTORE8_VARIANT_E */
  memcpy (ptr, &uvalue, 8);
#  endif
# endif
}

static inline void
stdc_store8_beu8 (uint_least8_t value, unsigned char ptr[1])
{
  ptr[0] = value;
}

static inline void
stdc_store8_beu16 (uint_least16_t value, unsigned char ptr[2])
{
  ptr[0] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[1] = (unsigned char) value & 0xFFU;
}

static inline void
stdc_store8_beu32 (uint_least32_t value, unsigned char ptr[4])
{
  ptr[0] = (unsigned char) (value >> 24) & 0xFFU;
  ptr[1] = (unsigned char) (value >> 16) & 0xFFU;
  ptr[2] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[3] = (unsigned char) value & 0xFFU;
}

static inline void
stdc_store8_beu64 (uint_least64_t value, unsigned char ptr[8])
{
  ptr[0] = (unsigned char) (value >> 56) & 0xFFU;
  ptr[1] = (unsigned char) (value >> 48) & 0xFFU;
  ptr[2] = (unsigned char) (value >> 40) & 0xFFU;
  ptr[3] = (unsigned char) (value >> 32) & 0xFFU;
  ptr[4] = (unsigned char) (value >> 24) & 0xFFU;
  ptr[5] = (unsigned char) (value >> 16) & 0xFFU;
  ptr[6] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[7] = (unsigned char) value & 0xFFU;
}

static inline void
stdc_store8_leu8 (uint_least8_t value, unsigned char ptr[1])
{
  ptr[0] = value;
}

static inline void
stdc_store8_leu16 (uint_least16_t value, unsigned char ptr[2])
{
  ptr[0] = (unsigned char) value & 0xFFU;
  ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
}

static inline void
stdc_store8_leu32 (uint_least32_t value, unsigned char ptr[4])
{
  ptr[0] = (unsigned char) value & 0xFFU;
  ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[2] = (unsigned char) (value >> 16) & 0xFFU;
  ptr[3] = (unsigned char) (value >> 24) & 0xFFU;
}

static inline void
stdc_store8_leu64 (uint_least64_t value, unsigned char ptr[8])
{
  ptr[0] = (unsigned char) value & 0xFFU;
  ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
  ptr[2] = (unsigned char) (value >> 16) & 0xFFU;
  ptr[3] = (unsigned char) (value >> 24) & 0xFFU;
  ptr[4] = (unsigned char) (value >> 32) & 0xFFU;
  ptr[5] = (unsigned char) (value >> 40) & 0xFFU;
  ptr[6] = (unsigned char) (value >> 48) & 0xFFU;
  ptr[7] = (unsigned char) (value >> 56) & 0xFFU;
}
#include <stdint.h>


static inline uint_least8_t
stdc_load8_beu8 (const unsigned char ptr[1])
{
  return ptr[0];
}

static inline uint_least16_t
stdc_load8_beu16 (const unsigned char ptr[2])
{
  return (ptr[0] << 8) | ptr[1];
}

static inline uint_least32_t
stdc_load8_beu32 (const unsigned char ptr[4])
{
  return ((uint_fast32_t) ptr[0] << 24) | ((uint_fast32_t) ptr[1] << 16)
         | ((uint_fast32_t) ptr[2] << 8) | (uint_fast32_t) ptr[3];
}

static inline uint_least64_t
stdc_load8_beu64 (const unsigned char ptr[8])
{
  return ((uint_fast64_t) ptr[0] << 56) | ((uint_fast64_t) ptr[1] << 48)
         | ((uint_fast64_t) ptr[2] << 40) | ((uint_fast64_t) ptr[3] << 32)
         | ((uint_fast64_t) ptr[4] << 24) | ((uint_fast64_t) ptr[5] << 16)
         | ((uint_fast64_t) ptr[6] << 8) | (uint_fast64_t) ptr[7];
}

static inline uint_least8_t
stdc_load8_leu8 (const unsigned char ptr[1])
{
  return ptr[0];
}

static inline uint_least16_t
stdc_load8_leu16 (const unsigned char ptr[2])
{
  return ptr[0] | (ptr[1] << 8);
}

static inline uint_least32_t
stdc_load8_leu32 (const unsigned char ptr[4])
{
  return (uint_fast32_t) ptr[0] | ((uint_fast32_t) ptr[1] << 8)
         | ((uint_fast32_t) ptr[2] << 16) | ((uint_fast32_t) ptr[3] << 24);
}

static inline uint_least64_t
stdc_load8_leu64 (const unsigned char ptr[8])
{
  return (uint_fast64_t) ptr[0] | ((uint_fast64_t) ptr[1] << 8)
         | ((uint_fast64_t) ptr[2] << 16) | ((uint_fast64_t) ptr[3] << 24)
         | ((uint_fast64_t) ptr[4] << 32) | ((uint_fast64_t) ptr[5] << 40)
         | ((uint_fast64_t) ptr[6] << 48) | ((uint_fast64_t) ptr[7] << 56);
}

uint_least8_t
stdc_load8_aligned_beu8 (const unsigned char ptr[1])
{
  return stdc_load8_beu8 (ptr);
}

uint_least16_t
stdc_load8_aligned_beu16 (const unsigned char ptr[2])
{
  return stdc_load8_beu16 (ptr);
}

uint_least32_t
stdc_load8_aligned_beu32 (const unsigned char ptr[4])
{
  return stdc_load8_beu32 (ptr);
}

uint_least64_t
stdc_load8_aligned_beu64 (const unsigned char ptr[8])
{
  return stdc_load8_beu64 (ptr);
}

uint_least8_t
stdc_load8_aligned_leu8 (const unsigned char ptr[1])
{
  return stdc_load8_leu8 (ptr);
}

uint_least16_t
stdc_load8_aligned_leu16 (const unsigned char ptr[2])
{
  return stdc_load8_leu16 (ptr);
}

uint_least32_t
stdc_load8_aligned_leu32 (const unsigned char ptr[4])
{
  return stdc_load8_leu32 (ptr);
}

uint_least64_t
stdc_load8_aligned_leu64 (const unsigned char ptr[8])
{
  return stdc_load8_leu64 (ptr);
}

static inline void
stdc_store8_beu8 (uint_least8_t value, unsigned char ptr[1])
{
  ptr[0] = value;
}

static inline void
stdc_store8_beu16 (uint_least16_t value, unsigned char ptr[2])
{
  ptr[0] = (value >> 8) & 0xFFU;
  ptr[1] = value & 0xFFU;
}

static inline void
stdc_store8_beu32 (uint_least32_t value, unsigned char ptr[4])
{
  ptr[0] = (value >> 24) & 0xFFU;
  ptr[1] = (value >> 16) & 0xFFU;
  ptr[2] = (value >> 8) & 0xFFU;
  ptr[3] = value & 0xFFU;
}

static inline void
stdc_store8_beu64 (uint_least64_t value, unsigned char ptr[8])
{
  ptr[0] = (value >> 56) & 0xFFU;
  ptr[1] = (value >> 48) & 0xFFU;
  ptr[2] = (value >> 40) & 0xFFU;
  ptr[3] = (value >> 32) & 0xFFU;
  ptr[4] = (value >> 24) & 0xFFU;
  ptr[5] = (value >> 16) & 0xFFU;
  ptr[6] = (value >> 8) & 0xFFU;
  ptr[7] = value & 0xFFU;
}

static inline void
stdc_store8_leu8 (uint_least8_t value, unsigned char ptr[1])
{
  ptr[0] = value;
}

static inline void
stdc_store8_leu16 (uint_least16_t value, unsigned char ptr[2])
{
  ptr[0] = value & 0xFFU;
  ptr[1] = (value >> 8) & 0xFFU;
}

static inline void
stdc_store8_leu32 (uint_least32_t value, unsigned char ptr[4])
{
  ptr[0] = value & 0xFFU;
  ptr[1] = (value >> 8) & 0xFFU;
  ptr[2] = (value >> 16) & 0xFFU;
  ptr[3] = (value >> 24) & 0xFFU;
}

static inline void
stdc_store8_leu64 (uint_least64_t value, unsigned char ptr[8])
{
  ptr[0] = value & 0xFFU;
  ptr[1] = (value >> 8) & 0xFFU;
  ptr[2] = (value >> 16) & 0xFFU;
  ptr[3] = (value >> 24) & 0xFFU;
  ptr[4] = (value >> 32) & 0xFFU;
  ptr[5] = (value >> 40) & 0xFFU;
  ptr[6] = (value >> 48) & 0xFFU;
  ptr[7] = (value >> 56) & 0xFFU;
}

void
stdc_store8_aligned_beu8 (uint_least8_t value, unsigned char ptr[1])
{
  stdc_store8_beu8 (value, ptr);
}

void
stdc_store8_aligned_beu16 (uint_least16_t value, unsigned char ptr[2])
{
  stdc_store8_beu16 (value, ptr);
}

void
stdc_store8_aligned_beu32 (uint_least32_t value, unsigned char ptr[4])
{
  stdc_store8_beu32 (value, ptr);
}

void
stdc_store8_aligned_beu64 (uint_least64_t value, unsigned char ptr[8])
{
  stdc_store8_beu64 (value, ptr);
}

void
stdc_store8_aligned_leu8 (uint_least8_t value, unsigned char ptr[1])
{
  stdc_store8_leu8 (value, ptr);
}

void
stdc_store8_aligned_leu16 (uint_least16_t value, unsigned char ptr[2])
{
  stdc_store8_leu16 (value, ptr);
}

void
stdc_store8_aligned_leu32 (uint_least32_t value, unsigned char ptr[4])
{
  stdc_store8_leu32 (value, ptr);
}

void
stdc_store8_aligned_leu64 (uint_least64_t value, unsigned char ptr[8])
{
  stdc_store8_leu64 (value, ptr);
}
        .file   "loadstore8-bruno.c"
        .option pic
        .attribute arch, 
"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
        .attribute unaligned_access, 0
        .attribute stack_align, 16
        .text
        .align  1
        .globl  stdc_load8_aligned_beu8
        .type   stdc_load8_aligned_beu8, @function
stdc_load8_aligned_beu8:
.LFB17:
        .cfi_startproc
        lbu     a0,0(a0)
        ret
        .cfi_endproc
.LFE17:
        .size   stdc_load8_aligned_beu8, .-stdc_load8_aligned_beu8
        .align  1
        .globl  stdc_load8_aligned_beu16
        .type   stdc_load8_aligned_beu16, @function
stdc_load8_aligned_beu16:
.LFB18:
        .cfi_startproc
        lhu     a5,0(a0)
        lhu     a4,0(a0)
        slliw   a0,a5,8
        srliw   a5,a4,8
        or      a0,a0,a5
        slli    a0,a0,48
        srli    a0,a0,48
        ret
        .cfi_endproc
.LFE18:
        .size   stdc_load8_aligned_beu16, .-stdc_load8_aligned_beu16
        .globl  __bswapsi2
        .align  1
        .globl  stdc_load8_aligned_beu32
        .type   stdc_load8_aligned_beu32, @function
stdc_load8_aligned_beu32:
.LFB19:
        .cfi_startproc
        addi    sp,sp,-16
        .cfi_def_cfa_offset 16
        sd      ra,8(sp)
        .cfi_offset 1, -8
        lw      a0,0(a0)
        call    __bswapsi2@plt
        ld      ra,8(sp)
        .cfi_restore 1
        sext.w  a0,a0
        addi    sp,sp,16
        .cfi_def_cfa_offset 0
        jr      ra
        .cfi_endproc
.LFE19:
        .size   stdc_load8_aligned_beu32, .-stdc_load8_aligned_beu32
        .globl  __bswapdi2
        .align  1
        .globl  stdc_load8_aligned_beu64
        .type   stdc_load8_aligned_beu64, @function
stdc_load8_aligned_beu64:
.LFB20:
        .cfi_startproc
        addi    sp,sp,-16
        .cfi_def_cfa_offset 16
        sd      ra,8(sp)
        .cfi_offset 1, -8
        ld      a0,0(a0)
        call    __bswapdi2@plt
        ld      ra,8(sp)
        .cfi_restore 1
        addi    sp,sp,16
        .cfi_def_cfa_offset 0
        jr      ra
        .cfi_endproc
.LFE20:
        .size   stdc_load8_aligned_beu64, .-stdc_load8_aligned_beu64
        .align  1
        .globl  stdc_load8_aligned_leu8
        .type   stdc_load8_aligned_leu8, @function
stdc_load8_aligned_leu8:
.LFB50:
        .cfi_startproc
        lbu     a0,0(a0)
        ret
        .cfi_endproc
.LFE50:
        .size   stdc_load8_aligned_leu8, .-stdc_load8_aligned_leu8
        .align  1
        .globl  stdc_load8_aligned_leu16
        .type   stdc_load8_aligned_leu16, @function
stdc_load8_aligned_leu16:
.LFB22:
        .cfi_startproc
        lhu     a0,0(a0)
        ret
        .cfi_endproc
.LFE22:
        .size   stdc_load8_aligned_leu16, .-stdc_load8_aligned_leu16
        .align  1
        .globl  stdc_load8_aligned_leu32
        .type   stdc_load8_aligned_leu32, @function
stdc_load8_aligned_leu32:
.LFB23:
        .cfi_startproc
        lw      a0,0(a0)
        ret
        .cfi_endproc
.LFE23:
        .size   stdc_load8_aligned_leu32, .-stdc_load8_aligned_leu32
        .align  1
        .globl  stdc_load8_aligned_leu64
        .type   stdc_load8_aligned_leu64, @function
stdc_load8_aligned_leu64:
.LFB24:
        .cfi_startproc
        ld      a0,0(a0)
        ret
        .cfi_endproc
.LFE24:
        .size   stdc_load8_aligned_leu64, .-stdc_load8_aligned_leu64
        .align  1
        .globl  stdc_store8_aligned_beu8
        .type   stdc_store8_aligned_beu8, @function
stdc_store8_aligned_beu8:
.LFB33:
        .cfi_startproc
        sb      a0,0(a1)
        ret
        .cfi_endproc
.LFE33:
        .size   stdc_store8_aligned_beu8, .-stdc_store8_aligned_beu8
        .align  1
        .globl  stdc_store8_aligned_beu16
        .type   stdc_store8_aligned_beu16, @function
stdc_store8_aligned_beu16:
.LFB34:
        .cfi_startproc
        slliw   a5,a0,8
        srliw   a0,a0,8
        or      a5,a5,a0
        sh      a5,0(a1)
        ret
        .cfi_endproc
.LFE34:
        .size   stdc_store8_aligned_beu16, .-stdc_store8_aligned_beu16
        .align  1
        .globl  stdc_store8_aligned_beu32
        .type   stdc_store8_aligned_beu32, @function
stdc_store8_aligned_beu32:
.LFB35:
        .cfi_startproc
        addi    sp,sp,-16
        .cfi_def_cfa_offset 16
        sd      s0,0(sp)
        sd      ra,8(sp)
        .cfi_offset 8, -16
        .cfi_offset 1, -8
        mv      s0,a1
        call    __bswapsi2@plt
        sw      a0,0(s0)
        ld      ra,8(sp)
        .cfi_restore 1
        ld      s0,0(sp)
        .cfi_restore 8
        addi    sp,sp,16
        .cfi_def_cfa_offset 0
        jr      ra
        .cfi_endproc
.LFE35:
        .size   stdc_store8_aligned_beu32, .-stdc_store8_aligned_beu32
        .align  1
        .globl  stdc_store8_aligned_beu64
        .type   stdc_store8_aligned_beu64, @function
stdc_store8_aligned_beu64:
.LFB36:
        .cfi_startproc
        addi    sp,sp,-16
        .cfi_def_cfa_offset 16
        sd      s0,0(sp)
        sd      ra,8(sp)
        .cfi_offset 8, -16
        .cfi_offset 1, -8
        mv      s0,a1
        call    __bswapdi2@plt
        sd      a0,0(s0)
        ld      ra,8(sp)
        .cfi_restore 1
        ld      s0,0(sp)
        .cfi_restore 8
        addi    sp,sp,16
        .cfi_def_cfa_offset 0
        jr      ra
        .cfi_endproc
.LFE36:
        .size   stdc_store8_aligned_beu64, .-stdc_store8_aligned_beu64
        .align  1
        .globl  stdc_store8_aligned_leu8
        .type   stdc_store8_aligned_leu8, @function
stdc_store8_aligned_leu8:
.LFB52:
        .cfi_startproc
        sb      a0,0(a1)
        ret
        .cfi_endproc
.LFE52:
        .size   stdc_store8_aligned_leu8, .-stdc_store8_aligned_leu8
        .align  1
        .globl  stdc_store8_aligned_leu16
        .type   stdc_store8_aligned_leu16, @function
stdc_store8_aligned_leu16:
.LFB38:
        .cfi_startproc
        sh      a0,0(a1)
        ret
        .cfi_endproc
.LFE38:
        .size   stdc_store8_aligned_leu16, .-stdc_store8_aligned_leu16
        .align  1
        .globl  stdc_store8_aligned_leu32
        .type   stdc_store8_aligned_leu32, @function
stdc_store8_aligned_leu32:
.LFB39:
        .cfi_startproc
        sw      a0,0(a1)
        ret
        .cfi_endproc
.LFE39:
        .size   stdc_store8_aligned_leu32, .-stdc_store8_aligned_leu32
        .align  1
        .globl  stdc_store8_aligned_leu64
        .type   stdc_store8_aligned_leu64, @function
stdc_store8_aligned_leu64:
.LFB40:
        .cfi_startproc
        sd      a0,0(a1)
        ret
        .cfi_endproc
.LFE40:
        .size   stdc_store8_aligned_leu64, .-stdc_store8_aligned_leu64
        .ident  "GCC: (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0"
        .section        .note.GNU-stack,"",@progbits
        .file   "loadstore8-paul.c"
        .option pic
        .attribute arch, 
"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
        .attribute unaligned_access, 0
        .attribute stack_align, 16
        .text
        .align  1
        .globl  stdc_load8_aligned_beu8
        .type   stdc_load8_aligned_beu8, @function
stdc_load8_aligned_beu8:
.LFB8:
        .cfi_startproc
        lbu     a0,0(a0)
        ret
        .cfi_endproc
.LFE8:
        .size   stdc_load8_aligned_beu8, .-stdc_load8_aligned_beu8
        .align  1
        .globl  stdc_load8_aligned_beu16
        .type   stdc_load8_aligned_beu16, @function
stdc_load8_aligned_beu16:
.LFB9:
        .cfi_startproc
        lbu     a5,1(a0)
        lbu     a4,0(a0)
        slli    a0,a5,8
        or      a0,a0,a4
        slliw   a5,a0,8
        srli    a0,a0,8
        or      a0,a0,a5
        slli    a0,a0,48
        srli    a0,a0,48
        ret
        .cfi_endproc
.LFE9:
        .size   stdc_load8_aligned_beu16, .-stdc_load8_aligned_beu16
        .align  1
        .globl  stdc_load8_aligned_beu32
        .type   stdc_load8_aligned_beu32, @function
stdc_load8_aligned_beu32:
.LFB10:
        .cfi_startproc
        lbu     a5,0(a0)
        lbu     a3,1(a0)
        lbu     a2,3(a0)
        lbu     a4,2(a0)
        slliw   a0,a5,24
        slliw   a5,a3,16
        or      a0,a0,a5
        or      a0,a0,a2
        slliw   a5,a4,8
        or      a0,a0,a5
        sext.w  a0,a0
        ret
        .cfi_endproc
.LFE10:
        .size   stdc_load8_aligned_beu32, .-stdc_load8_aligned_beu32
        .align  1
        .globl  stdc_load8_aligned_beu64
        .type   stdc_load8_aligned_beu64, @function
stdc_load8_aligned_beu64:
.LFB11:
        .cfi_startproc
        lbu     a5,0(a0)
        lbu     a4,1(a0)
        lbu     a6,7(a0)
        lbu     a1,2(a0)
        lbu     a2,3(a0)
        slli    a4,a4,48
        slli    a5,a5,56
        lbu     a3,4(a0)
        or      a5,a5,a4
        or      a5,a5,a6
        lbu     a4,5(a0)
        slli    a1,a1,40
        lbu     a0,6(a0)
        or      a5,a5,a1
        slli    a2,a2,32
        or      a5,a5,a2
        slli    a3,a3,24
        or      a5,a5,a3
        slli    a4,a4,16
        or      a5,a5,a4
        slli    a0,a0,8
        or      a0,a5,a0
        ret
        .cfi_endproc
.LFE11:
        .size   stdc_load8_aligned_beu64, .-stdc_load8_aligned_beu64
        .align  1
        .globl  stdc_load8_aligned_leu8
        .type   stdc_load8_aligned_leu8, @function
stdc_load8_aligned_leu8:
.LFB33:
        .cfi_startproc
        lbu     a0,0(a0)
        ret
        .cfi_endproc
.LFE33:
        .size   stdc_load8_aligned_leu8, .-stdc_load8_aligned_leu8
        .align  1
        .globl  stdc_load8_aligned_leu16
        .type   stdc_load8_aligned_leu16, @function
stdc_load8_aligned_leu16:
.LFB13:
        .cfi_startproc
        lbu     a5,1(a0)
        lbu     a0,0(a0)
        slli    a5,a5,8
        or      a0,a5,a0
        ret
        .cfi_endproc
.LFE13:
        .size   stdc_load8_aligned_leu16, .-stdc_load8_aligned_leu16
        .align  1
        .globl  stdc_load8_aligned_leu32
        .type   stdc_load8_aligned_leu32, @function
stdc_load8_aligned_leu32:
.LFB14:
        .cfi_startproc
        lbu     a4,1(a0)
        lbu     a3,0(a0)
        lbu     a5,2(a0)
        lbu     a0,3(a0)
        slli    a4,a4,8
        or      a4,a4,a3
        slli    a5,a5,16
        or      a5,a5,a4
        slli    a0,a0,24
        or      a0,a0,a5
        sext.w  a0,a0
        ret
        .cfi_endproc
.LFE14:
        .size   stdc_load8_aligned_leu32, .-stdc_load8_aligned_leu32
        .align  1
        .globl  stdc_load8_aligned_leu64
        .type   stdc_load8_aligned_leu64, @function
stdc_load8_aligned_leu64:
.LFB15:
        .cfi_startproc
        lbu     a6,1(a0)
        lbu     a5,0(a0)
        lbu     a1,2(a0)
        lbu     a2,3(a0)
        lbu     a3,4(a0)
        slli    a6,a6,8
        lbu     a4,5(a0)
        or      a6,a6,a5
        slli    a1,a1,16
        lbu     a5,6(a0)
        or      a1,a1,a6
        slli    a2,a2,24
        lbu     a0,7(a0)
        or      a2,a2,a1
        slli    a3,a3,32
        or      a3,a3,a2
        slli    a4,a4,40
        or      a4,a4,a3
        slli    a5,a5,48
        or      a5,a5,a4
        slli    a0,a0,56
        or      a0,a0,a5
        ret
        .cfi_endproc
.LFE15:
        .size   stdc_load8_aligned_leu64, .-stdc_load8_aligned_leu64
        .align  1
        .globl  stdc_store8_aligned_beu8
        .type   stdc_store8_aligned_beu8, @function
stdc_store8_aligned_beu8:
.LFB24:
        .cfi_startproc
        sb      a0,0(a1)
        ret
        .cfi_endproc
.LFE24:
        .size   stdc_store8_aligned_beu8, .-stdc_store8_aligned_beu8
        .align  1
        .globl  stdc_store8_aligned_beu16
        .type   stdc_store8_aligned_beu16, @function
stdc_store8_aligned_beu16:
.LFB25:
        .cfi_startproc
        srliw   a5,a0,8
        sb      a5,0(a1)
        sb      a0,1(a1)
        ret
        .cfi_endproc
.LFE25:
        .size   stdc_store8_aligned_beu16, .-stdc_store8_aligned_beu16
        .align  1
        .globl  stdc_store8_aligned_beu32
        .type   stdc_store8_aligned_beu32, @function
stdc_store8_aligned_beu32:
.LFB26:
        .cfi_startproc
        srliw   a3,a0,24
        srliw   a4,a0,16
        srliw   a5,a0,8
        sb      a3,0(a1)
        sb      a4,1(a1)
        sb      a5,2(a1)
        sb      a0,3(a1)
        ret
        .cfi_endproc
.LFE26:
        .size   stdc_store8_aligned_beu32, .-stdc_store8_aligned_beu32
        .align  1
        .globl  stdc_store8_aligned_beu64
        .type   stdc_store8_aligned_beu64, @function
stdc_store8_aligned_beu64:
.LFB27:
        .cfi_startproc
        srli    t1,a0,56
        srli    a7,a0,48
        srli    a6,a0,40
        srli    a2,a0,32
        srli    a3,a0,24
        srli    a4,a0,16
        srli    a5,a0,8
        sb      t1,0(a1)
        sb      a7,1(a1)
        sb      a6,2(a1)
        sb      a2,3(a1)
        sb      a3,4(a1)
        sb      a4,5(a1)
        sb      a5,6(a1)
        sb      a0,7(a1)
        ret
        .cfi_endproc
.LFE27:
        .size   stdc_store8_aligned_beu64, .-stdc_store8_aligned_beu64
        .align  1
        .globl  stdc_store8_aligned_leu8
        .type   stdc_store8_aligned_leu8, @function
stdc_store8_aligned_leu8:
.LFB35:
        .cfi_startproc
        sb      a0,0(a1)
        ret
        .cfi_endproc
.LFE35:
        .size   stdc_store8_aligned_leu8, .-stdc_store8_aligned_leu8
        .align  1
        .globl  stdc_store8_aligned_leu16
        .type   stdc_store8_aligned_leu16, @function
stdc_store8_aligned_leu16:
.LFB29:
        .cfi_startproc
        srliw   a5,a0,8
        sb      a0,0(a1)
        sb      a5,1(a1)
        ret
        .cfi_endproc
.LFE29:
        .size   stdc_store8_aligned_leu16, .-stdc_store8_aligned_leu16
        .align  1
        .globl  stdc_store8_aligned_leu32
        .type   stdc_store8_aligned_leu32, @function
stdc_store8_aligned_leu32:
.LFB30:
        .cfi_startproc
        srliw   a3,a0,8
        srliw   a4,a0,16
        srliw   a5,a0,24
        sb      a0,0(a1)
        sb      a3,1(a1)
        sb      a4,2(a1)
        sb      a5,3(a1)
        ret
        .cfi_endproc
.LFE30:
        .size   stdc_store8_aligned_leu32, .-stdc_store8_aligned_leu32
        .align  1
        .globl  stdc_store8_aligned_leu64
        .type   stdc_store8_aligned_leu64, @function
stdc_store8_aligned_leu64:
.LFB31:
        .cfi_startproc
        srli    t1,a0,8
        srli    a7,a0,16
        srli    a6,a0,24
        srli    a2,a0,32
        srli    a3,a0,40
        srli    a4,a0,48
        srli    a5,a0,56
        sb      a0,0(a1)
        sb      t1,1(a1)
        sb      a7,2(a1)
        sb      a6,3(a1)
        sb      a2,4(a1)
        sb      a3,5(a1)
        sb      a4,6(a1)
        sb      a5,7(a1)
        ret
        .cfi_endproc
.LFE31:
        .size   stdc_store8_aligned_leu64, .-stdc_store8_aligned_leu64
        .ident  "GCC: (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0"
        .section        .note.GNU-stack,"",@progbits

Reply via email to