On 1/10/26 00:23, Richard Henderson wrote:
I'm not fond of the pointer arithmetic or the code structure.
Perhaps better as
switch (mop & (MO_BSWAP | MO_SIZE)) {
case MO_LEUW:
return lduw_le_p(ptr);
case MO_BEUW:
return lduw_be_p(ptr);
...
default:
g_assert_not_reached();
}
which would hopefully compile to host endian-swapping load insns like
.L1:
mov (ptr), %eax
ret
.L2:
movbe (ptr), %eax
ret
It only might do so for 32-bits, because movbe also bundles a free
32->64-bit zero extension, but not for the smaller ones. Thinking about
which, I think ldm_p also needs to handle MO_SIGN? It can be done all
in one with
static inline uint64_t ldm_p(const void *ptr, MemOp mop)
{
const unsigned size = memop_size(mop);
uint64_t val;
uint8_t *pval = (uint8_t *)&val;
if (HOST_BIG_ENDIAN) {
pval += sizeof(val) - size;
}
assert(size < 8);
__builtin_memcpy(pval, ptr, size);
if (mop & MO_BSWAP) {
val = __builtin_bswap64(val);
} else if (mop & MO_SIGN) {
val <<= (64 - 8 * size);
} else {
return val;
}
if (mop & MO_SIGN) {
return ((int64_t) val) >> (64 - 8 * size);
} else {
return val >> (64 - 8 * size);
}
}
static inline void stm_p(void *ptr, uint64_t val, MemOp mop)
{
const unsigned size = memop_size(mop);
uint8_t *pval = (uint8_t *)&val;
assert(size < 8);
if ((mop & MO_BSWAP)) {
val = __builtin_bswap64(val) >> (64 - size * 8);
}
if (HOST_BIG_ENDIAN) {
pval += sizeof(val) - size;
}
__builtin_memcpy(ptr, pval, size);
}
When inlining ldm_p, GCC is able to generate movzx/movsx instruction but
doesn't recognize bswap64 + right shift as a smaller-width bswap + zero
extension; clang does. Neither is able to generate movbe instructions,
though.
I attach a standalone file I played with.
Paolo
#include <stdint.h>
#include <assert.h>
typedef enum MemOp {
MO_16 = 1,
MO_32 = 2,
MO_SIZE = 7,
MO_SIGN = 8,
MO_BSWAP = 16,
} MemOp;
extern void __attribute__((noreturn)) g_assert_not_reached(void);
static inline int memop_size(MemOp x)
{
return 1 << (x & MO_SIZE);
}
static inline uint64_t ldm_p(const void *ptr, MemOp mop)
{
const unsigned size = memop_size(mop);
uint64_t val;
uint8_t *pval = (uint8_t *)&val;
assert(size < 8);
__builtin_memcpy(pval, ptr, size);
if (mop & MO_BSWAP) {
val = __builtin_bswap64(val);
} else if (mop & MO_SIGN) {
val <<= (64 - 8 * size);
} else {
return val;
}
if (mop & MO_SIGN) {
return ((int64_t) val) >> (64 - 8 * size);
} else {
return val >> (64 - 8 * size);
}
}
static inline void stm_p(void *ptr, uint64_t val, MemOp mop)
{
const unsigned size = memop_size(mop);
uint8_t *pval = (uint8_t *)&val;
assert(size < 8);
if ((mop & MO_BSWAP)) {
val = __builtin_bswap64(val) >> (64 - size * 8);
}
__builtin_memcpy(ptr, pval, size);
}
uint32_t lduw_be_p(const void *ptr)
{
return ldm_p(ptr, MO_16 | MO_BSWAP);
}
uint32_t ldsw_le_p(const void *ptr)
{
return ldm_p(ptr, MO_16 | MO_SIGN);
}
uint32_t ldsl_be_p(const void *ptr)
{
return ldm_p(ptr, MO_32 | MO_BSWAP | MO_SIGN);
}
void stl_be_p(void *ptr, uint32_t val)
{
stm_p(ptr, val, MO_32 | MO_BSWAP | MO_SIGN);
}