On 6/16/26 1:17 AM, Richard Henderson wrote:
On 6/15/26 03:01, Gavin Shan wrote:
+void qemu_ram_copy(void *dest, const void *src, size_t n)
+{
+ if (HOST_UNALIGNED_MMIO_OK) {
+ switch (n) {
+ case 1:
+ __builtin_memcpy(dest, src, 1);
+ break;
+ case 2:
+ __builtin_memcpy(dest, src, 2);
+ break;
+ case 4:
+ __builtin_memcpy(dest, src, 4);
+ break;
+ case 8:
+ __builtin_memcpy(dest, src, 8);
+ break;
+ default:
+ memcpy(dest, src, n);
+ }
+ } else {
+ uintptr_t test, lsb;
+
+ do {
+ test = (uintptr_t)dest | n;
+ lsb = test & -test;
+ switch (lsb) {
Either assert n != 0 to start, or use while not do/while.
Because the body of the loop won't handle n == 0 correctly.
I will change this to "while (n > 0)" since we're not expecting
"n < 0" either.
+ case 1:
+ *(uint8_t *)dest = *(uint8_t *)src;
+ src += 1;
+ dest += 1;
+ n -= 1;
+ break;
+ case 2:
+ *(uint16_t *)dest = *(uint16_t *)src;
+ src += 2;
+ dest += 2;
+ n -= 2;
+ break;
+ case 4:
+ *(uint32_t *)dest = *(uint32_t *)src;
+ src += 4;
+ dest += 4;
+ n -= 4;
+ break;
+ default:
+ *(uint64_t *)dest = *(uint64_t *)src;
+ src += 8;
+ dest += 8;
+ n -= 8;
Use qatomic_set for the stores.
Could you confirm which stores need qatomic_set()? There are two sets
of stores as below. I guess you're asking have qatomic_set() for (a)?
Could you explain a bit why qatomic_set() is needed?
// (a)
*(uint64_t *)dest = *(uint64_t *)src;
// (b)
src += 8;
dest += 8;
n -= 8;
src is not aligned, so except for case 1, you need ld{uw,l,q}_he_p.
Yeah, I realized this after this patch was sent. ldub_p() will be
used for case 1 either.
Another unrelated question: why 'int' value is returned from ldub_p()
and ld{uw, l}_he_p() in bswap.h? They would return 'uint{8, 16, 32}_t'
values?
static inline int ldub_p(const void *ptr)
static inline int lduw_he_p(const void *ptr)
static inline int ldl_he_p(const void *ptr)
static inline uint64_t ldq_he_p(const void *ptr) // return uint64_t
+void qemu_ram_move(void *dest, const void *src, size_t n)
+{
+ if (HOST_UNALIGNED_MMIO_OK) {
+ switch (n) {
+ case 1:
+ __builtin_memmove(dest, src, 1);
+ break;
+ case 2:
+ __builtin_memmove(dest, src, 2);
+ break;
+ case 4:
+ __builtin_memmove(dest, src, 4);
+ break;
+ case 8:
+ __builtin_memmove(dest, src, 8);
+ break;
+ default:
+ memmove(dest, src, n);
+ }
+ } else {
+ qemu_ram_copy(dest, src, n);
+ }
+}
The qemu_ram_copy implementation above does not work with overlapping blocks.
Yeah, it's something I need to figure out for next revision. Thanks for your
review and comments.
r~
Thanks,
Gavin