[Mesa-dev] [PATCH 1/2] util: Add util_memcpy_cpu_to_le()
--- src/gallium/auxiliary/util/u_math.h | 22 ++ src/gallium/drivers/radeonsi/si_shader.c | 8 +--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b9ed197..cd3cf04 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -812,6 +812,28 @@ util_bswap16(uint16_t n) (n << 8); } +static INLINE void* +util_memcpy_cpu_to_le(void *dest, void *src, size_t n) +{ +#ifdef PIPE_ARCH_BIG_ENDIAN + size_t i, e; + for (i = 0, e = n % 8; i < e; i++) { + char *d = (char*)dest; + char *s = (char*)src; + d[i] = s[e - i - 1]; + } + dest += i; + n -= i; + for (i = 0, e = n / 8; i < e; i++) { + uint64_t *d = (uint64_t*)dest; + uint64_t *s = (uint64_t*)src; + d[i] = util_bswap64(s[e - i - 1]); + } + return dest; +#else + return memcpy(dest, src, n); +#endif +} /** * Clamp X to [MIN, MAX]. diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f0650f4..6f0504b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2559,13 +2559,7 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader, } ptr = (uint32_t*)sctx->b.ws->buffer_map(shader->bo->cs_buf, sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - if (SI_BIG_ENDIAN) { - for (i = 0; i < binary.code_size / 4; ++i) { - ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.code + i*4))); - } - } else { - memcpy(ptr, binary.code, binary.code_size); - } + util_memcpy_cpu_to_le(ptr, binary.code, binary.code_size); sctx->b.ws->buffer_unmap(shader->bo->cs_buf); free(binary.code); -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] util: Add util_memcpy_cpu_to_le()
On Tue, Jul 15, 2014 at 11:19 AM, Tom Stellard wrote: > --- > src/gallium/auxiliary/util/u_math.h | 22 ++ > src/gallium/drivers/radeonsi/si_shader.c | 8 +--- > 2 files changed, 23 insertions(+), 7 deletions(-) > > diff --git a/src/gallium/auxiliary/util/u_math.h > b/src/gallium/auxiliary/util/u_math.h > index b9ed197..cd3cf04 100644 > --- a/src/gallium/auxiliary/util/u_math.h > +++ b/src/gallium/auxiliary/util/u_math.h > @@ -812,6 +812,28 @@ util_bswap16(uint16_t n) >(n << 8); > } > > +static INLINE void* > +util_memcpy_cpu_to_le(void *dest, void *src, size_t n) > +{ > +#ifdef PIPE_ARCH_BIG_ENDIAN > + size_t i, e; > + for (i = 0, e = n % 8; i < e; i++) { > + char *d = (char*)dest; > + char *s = (char*)src; > + d[i] = s[e - i - 1]; > + } > + dest += i; > + n -= i; > + for (i = 0, e = n / 8; i < e; i++) { > + uint64_t *d = (uint64_t*)dest; > + uint64_t *s = (uint64_t*)src; > + d[i] = util_bswap64(s[e - i - 1]); > + } > Doesn't this reverse all of the byte (as if it were a list) without preserving word boundaries? e.g. |a, b, c, d | e, f, g, h | i, j, k, l | m, n, o, p | -> |p, o, n, m | l, j, k, i | h, g, f, e | d, c, b, a | The old code did something like this, didn't it?: |a, b, c, d | e, f, g, h | i, j, k, l | m, n, o, p | -> |d, c, b, a | h, g, f, e | l, k, j, i | p, o, n, m | I don't know which is correct, but it does seem like a behavior change. Or am I misreading the code? + return dest; > +#else > + return memcpy(dest, src, n); > +#endif > +} > > /** > * Clamp X to [MIN, MAX]. > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index f0650f4..6f0504b 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -2559,13 +2559,7 @@ int si_compile_llvm(struct si_context *sctx, struct > si_pipe_shader *shader, > } > > ptr = (uint32_t*)sctx->b.ws->buffer_map(shader->bo->cs_buf, > sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); > - if (SI_BIG_ENDIAN) { > - for (i = 0; i < binary.code_size / 4; ++i) { > - ptr[i] = > util_cpu_to_le32((*(uint32_t*)(binary.code + i*4))); > - } > - } else { > - memcpy(ptr, binary.code, binary.code_size); > - } > + util_memcpy_cpu_to_le(ptr, binary.code, binary.code_size); > sctx->b.ws->buffer_unmap(shader->bo->cs_buf); > > free(binary.code); > -- > 1.8.1.5 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] util: Add util_memcpy_cpu_to_le()
First of all, I think this should be two patches, one which adds the helper and one which makes radeonsi use it. On 16.07.2014 01:19, Tom Stellard wrote: > diff --git a/src/gallium/auxiliary/util/u_math.h > b/src/gallium/auxiliary/util/u_math.h > index b9ed197..cd3cf04 100644 > --- a/src/gallium/auxiliary/util/u_math.h > +++ b/src/gallium/auxiliary/util/u_math.h > @@ -812,6 +812,28 @@ util_bswap16(uint16_t n) >(n << 8); > } > > +static INLINE void* > +util_memcpy_cpu_to_le(void *dest, void *src, size_t n) This should indicate the size of the words being byte-swapped, e.g. util_memcpy_cpu_to_le32(). > +{ > +#ifdef PIPE_ARCH_BIG_ENDIAN > + size_t i, e; > + for (i = 0, e = n % 8; i < e; i++) { > + char *d = (char*)dest; > + char *s = (char*)src; > + d[i] = s[e - i - 1]; > + } > + dest += i; > + n -= i; This doesn't make sense. It's a caller error if n is not a multiple of the size of the words being being byte-swapped, so maybe just: assert((n % 3) == 0); > + for (i = 0, e = n / 8; i < e; i++) { > + uint64_t *d = (uint64_t*)dest; > + uint64_t *s = (uint64_t*)src; > + d[i] = util_bswap64(s[e - i - 1]); > + } This looks wrong as well. I think it should be: for (i = 0; i < n; i++) { uint32_t *d = (uint32_t*)dest; uint32_t *s = (uint32_t*)src; d[i] = util_bswap32(s[i]); } -- Earthling Michel Dänzer| http://www.amd.com Libre software enthusiast |Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev