[Qemu-devel] [PATCH v5 11/15] memory: Single byte swap along the I/O path

2019-07-25 Thread tony.nguyen
Now that MemOp has been pushed down into the memory API, we can
collapse the two byte swaps adjust_endianness and handle_bswap into
the former.

Collapsing byte swaps along the I/O path enables additional endian
inversion logic, e.g. SPARC64 Invert Endian TTE bit, with redundant
byte swaps cancelling out.

Signed-off-by: Tony Nguyen 
---
 accel/tcg/cputlb.c | 41 +++--
 memory.c   | 30 +-
 2 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 5d88cec..e61b1eb 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1209,26 +1209,13 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 #endif

 /*
- * Byte Swap Helper
+ * Byte Swap Checker
  *
- * This should all dead code away depending on the build host and
- * access type.
+ * Dead code should all go away depending on the build host and access type.
  */
-
-static inline uint64_t handle_bswap(uint64_t val, int size, bool big_endian)
+static inline bool need_bswap(bool big_endian)
 {
-if ((big_endian && NEED_BE_BSWAP) || (!big_endian && NEED_LE_BSWAP)) {
-switch (size) {
-case 1: return val;
-case 2: return bswap16(val);
-case 4: return bswap32(val);
-case 8: return bswap64(val);
-default:
-g_assert_not_reached();
-}
-} else {
-return val;
-}
+return (big_endian && NEED_BE_BSWAP) || (!big_endian && NEED_LE_BSWAP);
 }

 /*
@@ -1259,6 +1246,7 @@ load_helper(CPUArchState *env, target_ulong addr, 
TCGMemOpIdx oi,
 unsigned a_bits = get_alignment_bits(get_memop(oi));
 void *haddr;
 uint64_t res;
+MemOp op;

 /* Handle CPU specific unaligned behaviour */
 if (addr & ((1 << a_bits) - 1)) {
@@ -1304,9 +1292,13 @@ load_helper(CPUArchState *env, target_ulong addr, 
TCGMemOpIdx oi,
 }
 }

-res = io_readx(env, &env_tlb(env)->d[mmu_idx].iotlb[index],
-   mmu_idx, addr, retaddr, access_type, SIZE_MEMOP(size));
-return handle_bswap(res, size, big_endian);
+op = SIZE_MEMOP(size);
+if (need_bswap(big_endian)) {
+op ^= MO_BSWAP;
+}
+
+return io_readx(env, &env_tlb(env)->d[mmu_idx].iotlb[index],
+   mmu_idx, addr, retaddr, access_type, op);
 }

 /* Handle slow unaligned access (it spans two pages or IO).  */
@@ -1507,6 +1499,7 @@ store_helper(CPUArchState *env, target_ulong addr, 
uint64_t val,
 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
 unsigned a_bits = get_alignment_bits(get_memop(oi));
 void *haddr;
+MemOp op;

 /* Handle CPU specific unaligned behaviour */
 if (addr & ((1 << a_bits) - 1)) {
@@ -1552,9 +1545,13 @@ store_helper(CPUArchState *env, target_ulong addr, 
uint64_t val,
 }
 }

+op = SIZE_MEMOP(size);
+if (need_bswap(big_endian)) {
+op ^= MO_BSWAP;
+}
+
 io_writex(env, &env_tlb(env)->d[mmu_idx].iotlb[index], mmu_idx,
-  handle_bswap(val, size, big_endian),
-  addr, retaddr, SIZE_MEMOP(size));
+  val, addr, retaddr, op);
 return;
 }

diff --git a/memory.c b/memory.c
index 6982e19..0277d3d 100644
--- a/memory.c
+++ b/memory.c
@@ -352,7 +352,7 @@ static bool memory_region_big_endian(MemoryRegion *mr)
 #endif
 }

-static bool memory_region_wrong_endianness(MemoryRegion *mr)
+static bool memory_region_endianness_inverted(MemoryRegion *mr)
 {
 #ifdef TARGET_WORDS_BIGENDIAN
 return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
@@ -361,23 +361,27 @@ static bool memory_region_wrong_endianness(MemoryRegion 
*mr)
 #endif
 }

-static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
+static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op)
 {
-if (memory_region_wrong_endianness(mr)) {
-switch (size) {
-case 1:
+if (memory_region_endianness_inverted(mr)) {
+op ^= MO_BSWAP;
+}
+
+if (op & MO_BSWAP) {
+switch (op & MO_SIZE) {
+case MO_8:
 break;
-case 2:
+case MO_16:
 *data = bswap16(*data);
 break;
-case 4:
+case MO_32:
 *data = bswap32(*data);
 break;
-case 8:
+case MO_64:
 *data = bswap64(*data);
 break;
 default:
-abort();
+g_assert_not_reached();
 }
 }
 }
@@ -1451,7 +1455,7 @@ MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
 }

 r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
-adjust_endianness(mr, pval, size);
+adjust_endianness(mr, pval, op);
 return r;
 }

@@ -1494,7 +1498,7 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
 return MEMTX_DECODE_ERROR;
 }

-adjust_endianness(mr, &

Re: [Qemu-devel] [PATCH v5 11/15] memory: Single byte swap along the I/O path

2019-07-26 Thread Paolo Bonzini
On 26/07/19 08:47, tony.ngu...@bt.com wrote:
> +        op = SIZE_MEMOP(size);
> +        if (need_bswap(big_endian)) {
> +            op ^= MO_BSWAP;
> +        }

And this has the same issue as the first version.  It should be

op = SIZE_MEMOP(size) | (big_endian ? MO_BE : MO_LE);

and everything should work.  If it doesn't (and indeed it doesn't :)) it
means you have bugs somewhere else.

Paolo



Re: [Qemu-devel] [PATCH v5 11/15] memory: Single byte swap along the I/O path

2019-07-26 Thread Paolo Bonzini
On 26/07/19 08:47, tony.ngu...@bt.com wrote:
> +static bool memory_region_endianness_inverted(MemoryRegion *mr)
>  {
>  #ifdef TARGET_WORDS_BIGENDIAN
>      return mr->ops->endianness == DEVICE_LITTLE_ENDIAN;
> @@ -361,23 +361,27 @@ static bool
> memory_region_wrong_endianness(MemoryRegion *mr)
>  #endif
>  }
>  
> -static void adjust_endianness(MemoryRegion *mr, uint64_t *data,
> unsigned size)
> +static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op)
>  {
> -    if (memory_region_wrong_endianness(mr)) {
> -        switch (size) {
> -        case 1:
> +    if (memory_region_endianness_inverted(mr)) {
> +        op ^= MO_BSWAP;
> +    }

Here it should not matter: the caller of memory_region_dispatch_read
should includes one of MO_TE/MO_LE/MO_BE in the op (or nothing for host
endianness).  Then memory_region_endianness_inverted can be:

  if (mr->ops->endianness == DEVICE_NATIVE_ENDIAN)
return (op & MO_BSWAP) != MO_TE;
  else if (mr->ops->endianness == DEVICE_BIG_ENDIAN)
return (op & MO_BSWAP) != MO_BE;
  else if (mr->ops->endianness == DEVICE_LITTLE_ENDIAN)
return (op & MO_BSWAP) != MO_LE;

and adjust_endianness does

  if (memory_region_endianness_inverted(mr, op)) {
switch (op & MO_SIZE) {
  ...
}
  }

I think the changes should be split in two parts.  Before this patch,
you modify all callers of memory_region_dispatch_* so that they already
pass the right endianness op; however, you leave the existing swap in
place.  So for example in load_helper you'd have in a previous patch

+/* FIXME: io_readx ignores MO_BSWAP.  */
+op = SIZE_MEMOP(size) | (big_endian ? MO_BE : MO_LE);
 res = io_readx(env, &env_tlb(env)->d[mmu_idx].iotlb[index],
-   mmu_idx, addr, retaddr, access_type,
SIZE_MEMOP(size));
+   mmu_idx, addr, retaddr, access_type, op);
 return handle_bswap(res, size, big_endian);

Then, in this patch, you remove the handle_bswap call as well as the
FIXME comment.

Paolo



Re: [Qemu-devel] [PATCH v5 11/15] memory: Single byte swap along the I/O path

2019-07-26 Thread Richard Henderson
On 7/26/19 2:26 AM, Paolo Bonzini wrote:
> On 26/07/19 08:47, tony.ngu...@bt.com wrote:
>> +        op = SIZE_MEMOP(size);
>> +        if (need_bswap(big_endian)) {
>> +            op ^= MO_BSWAP;
>> +        }
> 
> And this has the same issue as the first version.  It should be
> 
>   op = SIZE_MEMOP(size) | (big_endian ? MO_BE : MO_LE);
> 
> and everything should work.  If it doesn't (and indeed it doesn't :)) it
> means you have bugs somewhere else.

As I mentioned against patch 9, which also touches this area, it should be
using the MemOp that is already passed in to this function instead of building
a new one from scratch.

But, yes, any failure in that would mean bugs somewhere else.  ;-)


r~



Re: [Qemu-devel] [PATCH v5 11/15] memory: Single byte swap along the I/O path

2019-07-26 Thread Richard Henderson
On 7/26/19 2:39 AM, Paolo Bonzini wrote:
> Then memory_region_endianness_inverted can be:
> 
>   if (mr->ops->endianness == DEVICE_NATIVE_ENDIAN)
> return (op & MO_BSWAP) != MO_TE;
>   else if (mr->ops->endianness == DEVICE_BIG_ENDIAN)
> return (op & MO_BSWAP) != MO_BE;
>   else if (mr->ops->endianness == DEVICE_LITTLE_ENDIAN)
> return (op & MO_BSWAP) != MO_LE;

Possibly outside the scope of this patch set, I'd like to replace enum
device_endian with MemOp, with exactly the above enumerator replacement.

In the meantime, perhaps a conversion function

static MemOp devendian_memop(enum device_endian d)
{
switch (d) {
case DEVICE_NATIVE_ENDIAN:
return MO_TE;
case DEVICE_BIG_ENDIAN:
return MO_BE;
case DEVICE_LITTLE_ENDIAN:
return MO_LE;
default:
g_assert_not_reached();
}
}

With that, this would simplify to

return (op & MO_BSWAP) != devendian_memop(mr->ops->endianness);


> I think the changes should be split in two parts.  Before this patch,
> you modify all callers of memory_region_dispatch_* so that they already
> pass the right endianness op; however, you leave the existing swap in
> place.  So for example in load_helper you'd have in a previous patch
> 
> +/* FIXME: io_readx ignores MO_BSWAP.  */
> +op = SIZE_MEMOP(size) | (big_endian ? MO_BE : MO_LE);
>  res = io_readx(env, &env_tlb(env)->d[mmu_idx].iotlb[index],
> -   mmu_idx, addr, retaddr, access_type,
> SIZE_MEMOP(size));
> +   mmu_idx, addr, retaddr, access_type, op);
>  return handle_bswap(res, size, big_endian);
> 
> Then, in this patch, you remove the handle_bswap call as well as the
> FIXME comment.

Agreed.


r~