On 6/16/26 1:17 AM, Richard Henderson wrote:
On 6/15/26 03:01, Gavin Shan wrote:
+void qemu_ram_copy(void *dest, const void *src, size_t n)
+{
+    if (HOST_UNALIGNED_MMIO_OK) {
+        switch (n) {
+        case 1:
+            __builtin_memcpy(dest, src, 1);
+            break;
+        case 2:
+            __builtin_memcpy(dest, src, 2);
+            break;
+        case 4:
+            __builtin_memcpy(dest, src, 4);
+            break;
+        case 8:
+            __builtin_memcpy(dest, src, 8);
+            break;
+        default:
+            memcpy(dest, src, n);
+        }
+    } else {
+        uintptr_t test, lsb;
+
+        do {
+            test = (uintptr_t)dest | n;
+            lsb = test & -test;
+            switch (lsb) {

Either assert n != 0 to start, or use while not do/while.
Because the body of the loop won't handle n == 0 correctly.


I will change this to "while (n > 0)" since we're not expecting
"n < 0" either.

+            case 1:
+                *(uint8_t *)dest = *(uint8_t *)src;
+                src += 1;
+                dest += 1;
+                n -= 1;
+                break;
+            case 2:
+                *(uint16_t *)dest = *(uint16_t *)src;
+                src += 2;
+                dest += 2;
+                n -= 2;
+                break;
+            case 4:
+                *(uint32_t *)dest = *(uint32_t *)src;
+                src += 4;
+                dest += 4;
+                n -= 4;
+                break;
+            default:
+                *(uint64_t *)dest = *(uint64_t *)src;
+                src += 8;
+                dest += 8;
+                n -= 8;

Use qatomic_set for the stores.


Could you confirm which stores need qatomic_set()? There are two sets
of stores as below. I guess you're asking have qatomic_set() for (a)?
Could you explain a bit why qatomic_set() is needed?

    // (a)
    *(uint64_t *)dest = *(uint64_t *)src;


    // (b)
    src += 8;
    dest += 8;
    n -= 8;

src is not aligned, so except for case 1, you need ld{uw,l,q}_he_p.


Yeah, I realized this after this patch was sent. ldub_p() will be
used for case 1 either.

Another unrelated question: why 'int' value is returned from ldub_p()
and ld{uw, l}_he_p() in bswap.h? They would return 'uint{8, 16, 32}_t'
values?

static inline int ldub_p(const void *ptr)
static inline int lduw_he_p(const void *ptr)
static inline int ldl_he_p(const void *ptr)
static inline uint64_t ldq_he_p(const void *ptr)   // return uint64_t

+void qemu_ram_move(void *dest, const void *src, size_t n)
+{
+    if (HOST_UNALIGNED_MMIO_OK) {
+        switch (n) {
+        case 1:
+            __builtin_memmove(dest, src, 1);
+            break;
+        case 2:
+            __builtin_memmove(dest, src, 2);
+            break;
+        case 4:
+            __builtin_memmove(dest, src, 4);
+            break;
+        case 8:
+            __builtin_memmove(dest, src, 8);
+            break;
+        default:
+            memmove(dest, src, n);
+        }
+    } else {
+        qemu_ram_copy(dest, src, n);
+    }
+}

The qemu_ram_copy implementation above does not work with overlapping blocks.


Yeah, it's something I need to figure out for next revision. Thanks for your
review and comments.


r~


Thanks,
Gavin


Reply via email to