On Mon, Jun 15, 2026 at 12:36:00PM -0400, Michael S. Tsirkin wrote:
> On Mon, Jun 15, 2026 at 08:17:14AM -0700, Richard Henderson wrote:
> > On 6/15/26 03:01, Gavin Shan wrote:
> > > +void qemu_ram_copy(void *dest, const void *src, size_t n)
> > > +{
> > > +    if (HOST_UNALIGNED_MMIO_OK) {
> > > +        switch (n) {
> > > +        case 1:
> > > +            __builtin_memcpy(dest, src, 1);
> > > +            break;
> > > +        case 2:
> > > +            __builtin_memcpy(dest, src, 2);
> > > +            break;
> > > +        case 4:
> > > +            __builtin_memcpy(dest, src, 4);
> > > +            break;
> > > +        case 8:
> > > +            __builtin_memcpy(dest, src, 8);
> > > +            break;
> > > +        default:
> > > +            memcpy(dest, src, n);
> > > +        }
> > > +    } else {
> > > +        uintptr_t test, lsb;
> > > +
> > > +        do {
> > > +            test = (uintptr_t)dest | n;
> > > +            lsb = test & -test;
> > > +            switch (lsb) {
> > 
> > Either assert n != 0 to start, or use while not do/while.
> > Because the body of the loop won't handle n == 0 correctly.

I think n!=0 performs better than a loop. Worth checking
asm though.

> > > +            case 1:
> > > +                *(uint8_t *)dest = *(uint8_t *)src;
> > > +                src += 1;
> > > +                dest += 1;
> > > +                n -= 1;
> > > +                break;
> > > +            case 2:
> > > +                *(uint16_t *)dest = *(uint16_t *)src;
> > > +                src += 2;
> > > +                dest += 2;
> > > +                n -= 2;
> > > +                break;
> > > +            case 4:
> > > +                *(uint32_t *)dest = *(uint32_t *)src;
> > > +                src += 4;
> > > +                dest += 4;
> > > +                n -= 4;
> > > +                break;
> > > +            default:
> > > +                *(uint64_t *)dest = *(uint64_t *)src;
> > > +                src += 8;
> > > +                dest += 8;
> > > +                n -= 8;
> > 
> > Use qatomic_set for the stores.
> 
> 
> Won't this do things like locked on x86?

Hmm I didn't check. It's relaxed - so no locked. Fine by me.

> 
> 
> > 
> > src is not aligned, so except for case 1, you need ld{uw,l,q}_he_p.
> 
> These just map back to memcpy. What is the point?
> 
> > > +void qemu_ram_move(void *dest, const void *src, size_t n)
> > > +{
> > > +    if (HOST_UNALIGNED_MMIO_OK) {
> > > +        switch (n) {
> > > +        case 1:
> > > +            __builtin_memmove(dest, src, 1);
> > > +            break;
> > > +        case 2:
> > > +            __builtin_memmove(dest, src, 2);
> > > +            break;
> > > +        case 4:
> > > +            __builtin_memmove(dest, src, 4);
> > > +            break;
> > > +        case 8:
> > > +            __builtin_memmove(dest, src, 8);
> > > +            break;
> > > +        default:
> > > +            memmove(dest, src, n);
> > > +        }
> > > +    } else {
> > > +        qemu_ram_copy(dest, src, n);
> > > +    }
> > > +}
> > 
> > The qemu_ram_copy implementation above does not work with overlapping 
> > blocks.
> 
> IIUC copy is not supposed to. this is what move is for.
> 
> > 
> > r~


Reply via email to