Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c
Am 06.10.2017 um 11:57 schrieb Nicolai Hähnle: On 05.10.2017 20:59, Jochen Rollwagen wrote: Am 04.10.2017 um 05:59 schrieb Matt Turner: On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagenwrote: From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 From: Jochen Rollwagen Date: Tue, 3 Oct 2017 19:54:10 +0200 Subject: [PATCH] Replace byte-swapping code with builtins in pack.c This patch replaces some code for byte-swapping in pack.c with the builtin functions allowing the compiler to do its optimization magic --- src/mesa/main/pack.c | 22 ++ 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 94a6d28..9bfde39 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, } } - -#define SWAP2BYTE(VALUE)\ - {\ - GLubyte *bytes = (GLubyte *) &(VALUE);\ - GLubyte tmp = bytes[0];\ - bytes[0] = bytes[1];\ - bytes[1] = tmp;\ - } - -#define SWAP4BYTE(VALUE)\ - {\ - GLubyte *bytes = (GLubyte *) &(VALUE);\ - GLubyte tmp = bytes[0];\ - bytes[0] = bytes[3];\ - bytes[3] = tmp;\ - tmp = bytes[1];\ - bytes[1] = bytes[2];\ - bytes[2] = tmp;\ - } - +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) In my experience it's much simpler to just write these as return ((x & 0xff) << 8) | ((x >> 8) & 0xff); and return ((x & 0xff) << 24) | ((x & 0xff00) << 8) | ((x & 0xff) >> 8) | ((x >> 24) & 0xff); and not have to deal with compiler intrinsics. Compilers will recognize these patterns and use the appropriate instructions (rol for 2-bytes and bswap for 4-bytes). You should be able to count the numbers of those instructions before and after such a patch to confirm it's working as expected. Fair enough. I've attached a new patch that optimizes the code on linux systems only where there is byteswap.h containing (hopefully optimal) functions. The other systems may be dealt with by a followup patch if desired. This doesn't really address Matt's point that compilers are good at pattern matching byte swaps already. Unless you can actually show with comparisons of the assembly and/or benchmarks that this results in better code, your patch makes the code base slightly worse because you've now added two different code paths where there was previously only one. Cheers, Nicolai I'm afraid you're overestimating the compiler's abilities to detect such a byte swapping pattern. The following c code #include static uint32_t SWAP4BYTE(uint32_t n) { return (n >> 24) | ((n >> 8) & 0xff00) | ((n << 8) & 0x00ff) | (n << 24); } static uint32_t builtin_SWAP4BYTE(uint32_t n) { return __builtin_bswap32(n); } main(){ SWAP4BYTE(321764); buitlin_SWAP4BYTE(321764); } compiles to the following assembler code on powerpc platforms with gcc --version gcc (Ubuntu 6.2.0-3ubuntu11~12.04) 6.2.0 20160901: .section".text" .align 2 .typeSWAP4BYTE, @function SWAP4BYTE: stwu 1,-32(1) stw 31,28(1) mr 31,1 stw 3,12(31) lwz 9,12(31) srwi 10,9,24 lwz 9,12(31) srwi 9,9,8 rlwinm 9,9,0,16,23 or 10,10,9 lwz 9,12(31) slwi 9,9,8 rlwinm 9,9,0,8,15 or 10,10,9 lwz 9,12(31) slwi 9,9,24 or 9,10,9 mr 3,9 addi 11,31,32 lwz 31,-4(11) mr 1,11 blr .sizeSWAP4BYTE,.-SWAP4BYTE .align 2 .typebuiltin_SWAP4BYTE, @function builtin_SWAP4BYTE: stwu 1,-32(1) stw 31,28(1) mr 31,1 stw 3,12(31) addi 10,31,12 lwbrx 9,0,10 mr 3,9 addi 11,31,32 lwz 31,-4(11) mr 1,11 blr .sizebuiltin_SWAP4BYTE,.-builtin_SWAP4BYTE .align 2 .globl main .typemain, @function The builtin function seems slightly more optimized. Cheers Jochen ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c
On Sun, Oct 8, 2017 at 12:15 PM, Jochen Rollwagenwrote: > Am 06.10.2017 um 11:57 schrieb Nicolai Hähnle: >> >> On 05.10.2017 20:59, Jochen Rollwagen wrote: >>> >>> Am 04.10.2017 um 05:59 schrieb Matt Turner: On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen wrote: > > From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 > From: Jochen Rollwagen > Date: Tue, 3 Oct 2017 19:54:10 +0200 > Subject: [PATCH] Replace byte-swapping code with builtins in pack.c > > This patch replaces some code for byte-swapping in pack.c with the > builtin > functions allowing the compiler to do its optimization magic > --- > src/mesa/main/pack.c | 22 ++ > 1 file changed, 2 insertions(+), 20 deletions(-) > > diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c > index 94a6d28..9bfde39 100644 > --- a/src/mesa/main/pack.c > +++ b/src/mesa/main/pack.c > @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, > const > GLubyte > *source, > } > } > > - > -#define SWAP2BYTE(VALUE)\ > - {\ > - GLubyte *bytes = (GLubyte *) &(VALUE);\ > - GLubyte tmp = bytes[0];\ > - bytes[0] = bytes[1];\ > - bytes[1] = tmp;\ > - } > - > -#define SWAP4BYTE(VALUE)\ > - {\ > - GLubyte *bytes = (GLubyte *) &(VALUE);\ > - GLubyte tmp = bytes[0];\ > - bytes[0] = bytes[3];\ > - bytes[3] = tmp;\ > - tmp = bytes[1];\ > - bytes[1] = bytes[2];\ > - bytes[2] = tmp;\ > - } > - > +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) > +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) In my experience it's much simpler to just write these as return ((x & 0xff) << 8) | ((x >> 8) & 0xff); and return ((x & 0xff) << 24) | ((x & 0xff00) << 8) | ((x & 0xff) >> 8) | ((x >> 24) & 0xff); and not have to deal with compiler intrinsics. Compilers will recognize these patterns and use the appropriate instructions (rol for 2-bytes and bswap for 4-bytes). You should be able to count the numbers of those instructions before and after such a patch to confirm it's working as expected. >>> >>> Fair enough. I've attached a new patch that optimizes the code on linux >>> systems only where there is byteswap.h containing (hopefully optimal) >>> functions. The other systems may be dealt with by a followup patch if >>> desired. >> >> >> This doesn't really address Matt's point that compilers are good at >> pattern matching byte swaps already. >> >> Unless you can actually show with comparisons of the assembly and/or >> benchmarks that this results in better code, your patch makes the code base >> slightly worse because you've now added two different code paths where there >> was previously only one. >> >> Cheers, >> Nicolai > > I'm afraid you're overestimating the compiler's abilities to detect such a > byte swapping pattern. > > The following c code > > #include > > static uint32_t > SWAP4BYTE(uint32_t n) > { >return (n >> 24) | > ((n >> 8) & 0xff00) | > ((n << 8) & 0x00ff) | > (n << 24); > } > > static uint32_t > builtin_SWAP4BYTE(uint32_t n) > { >return __builtin_bswap32(n); > } > > main(){ > SWAP4BYTE(321764); > buitlin_SWAP4BYTE(321764); > } > > compiles to the following assembler code on powerpc platforms with gcc > --version gcc (Ubuntu 6.2.0-3ubuntu11~12.04) 6.2.0 20160901: > > .section".text" > .align 2 > .typeSWAP4BYTE, @function > SWAP4BYTE: > stwu 1,-32(1) > stw 31,28(1) > mr 31,1 > stw 3,12(31) > lwz 9,12(31) > srwi 10,9,24 > lwz 9,12(31) > srwi 9,9,8 > rlwinm 9,9,0,16,23 > or 10,10,9 > lwz 9,12(31) > slwi 9,9,8 > rlwinm 9,9,0,8,15 > or 10,10,9 > lwz 9,12(31) > slwi 9,9,24 > or 9,10,9 > mr 3,9 > addi 11,31,32 > lwz 31,-4(11) > mr 1,11 > blr > .sizeSWAP4BYTE,.-SWAP4BYTE > .align 2 > .typebuiltin_SWAP4BYTE, @function > builtin_SWAP4BYTE: > stwu 1,-32(1) > stw 31,28(1) > mr 31,1 > stw 3,12(31) > addi 10,31,12 > lwbrx 9,0,10 > mr 3,9 > addi 11,31,32 > lwz 31,-4(11) > mr 1,11 > blr > .sizebuiltin_SWAP4BYTE,.-builtin_SWAP4BYTE > .align 2 > .globl main > .typemain, @function > > The builtin function seems slightly more optimized. Compile with optimization, like a real build. With -O2 it produces
Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c
Quoting Erik Faye-Lund (2017-10-06 00:31:20) > On Thu, Oct 5, 2017 at 8:59 PM, Jochen Rollwagen> wrote: > > Am 04.10.2017 um 05:59 schrieb Matt Turner: > >> > >> On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen > >> wrote: > >>> > >>> From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 > >>> From: Jochen Rollwagen > >>> Date: Tue, 3 Oct 2017 19:54:10 +0200 > >>> Subject: [PATCH] Replace byte-swapping code with builtins in pack.c > >>> > >>> This patch replaces some code for byte-swapping in pack.c with the > >>> builtin > >>> functions allowing the compiler to do its optimization magic > >>> --- > >>> src/mesa/main/pack.c | 22 ++ > >>> 1 file changed, 2 insertions(+), 20 deletions(-) > >>> > >>> diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c > >>> index 94a6d28..9bfde39 100644 > >>> --- a/src/mesa/main/pack.c > >>> +++ b/src/mesa/main/pack.c > >>> @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const > >>> GLubyte > >>> *source, > >>> } > >>> } > >>> > >>> - > >>> -#define SWAP2BYTE(VALUE)\ > >>> - {\ > >>> - GLubyte *bytes = (GLubyte *) &(VALUE);\ > >>> - GLubyte tmp = bytes[0];\ > >>> - bytes[0] = bytes[1];\ > >>> - bytes[1] = tmp;\ > >>> - } > >>> - > >>> -#define SWAP4BYTE(VALUE)\ > >>> - {\ > >>> - GLubyte *bytes = (GLubyte *) &(VALUE);\ > >>> - GLubyte tmp = bytes[0];\ > >>> - bytes[0] = bytes[3];\ > >>> - bytes[3] = tmp;\ > >>> - tmp = bytes[1];\ > >>> - bytes[1] = bytes[2];\ > >>> - bytes[2] = tmp;\ > >>> - } > >>> - > >>> +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) > >>> +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) > >> > >> In my experience it's much simpler to just write these as > >> > >> return ((x & 0xff) << 8) | ((x >> 8) & 0xff); > >> > >> and > >> > >> return ((x & 0xff) << 24) | > >> ((x & 0xff00) << 8) | > >> ((x & 0xff) >> 8) | > >> ((x >> 24) & 0xff); > >> > >> and not have to deal with compiler intrinsics. Compilers will > >> recognize these patterns and use the appropriate instructions (rol for > >> 2-bytes and bswap for 4-bytes). > >> > >> You should be able to count the numbers of those instructions before > >> and after such a patch to confirm it's working as expected. > > > > Fair enough. I've attached a new patch that optimizes the code on linux > > systems only where there is byteswap.h containing (hopefully optimal) > > functions. The other systems may be dealt with by a followup patch if > > desired. > > > > From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001 > > From: Jochen Rollwagen > > Date: Thu, 5 Oct 2017 20:48:46 +0200 > > Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems > > > > This patch replaces the code for byte swapping in pack.c with the function > > from > > byteswap.h on Linux systems > > --- > > src/mesa/main/pack.c |8 +++- > > 1 file changed, 7 insertions(+), 1 deletion(-) > > > > diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c > > index 94a6d28..d8ab267 100644 > > --- a/src/mesa/main/pack.c > > +++ b/src/mesa/main/pack.c > > @@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const > > GLubyte > > *source, > > } > > } > > > > +#ifdef __linux__ > > +#include > > + > > +#define SWAP2BYTE(VALUE) bswap_16(VALUE) > > +#define SWAP4BYTE(VALUE) bswap_32(VALUE) > > +#else > > Another alternative would be to use: > > AX_GCC_BUILTIN / HAVE___BUILTIN_BSWAP{16,32} > > ...to check explicitly for the builtins, like we do for other > builtins. We already do this for __builtin_bswap32 and > __builtin_bswap64, so it would seem like a pretty straight-forward > extension of that pattern. Not withstanding Matt and Nicolai's points: We already check for bswap builtins in configure (and meson), and would be the right way to guard this since this isn't really linux specific, it's gcc/clang specific, and I *think* the BSD's would benefit as well. Dylan signature.asc Description: signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c
On 05.10.2017 20:59, Jochen Rollwagen wrote: Am 04.10.2017 um 05:59 schrieb Matt Turner: On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagenwrote: From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 From: Jochen Rollwagen Date: Tue, 3 Oct 2017 19:54:10 +0200 Subject: [PATCH] Replace byte-swapping code with builtins in pack.c This patch replaces some code for byte-swapping in pack.c with the builtin functions allowing the compiler to do its optimization magic --- src/mesa/main/pack.c | 22 ++ 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 94a6d28..9bfde39 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, } } - -#define SWAP2BYTE(VALUE) \ - { \ - GLubyte *bytes = (GLubyte *) &(VALUE); \ - GLubyte tmp = bytes[0]; \ - bytes[0] = bytes[1]; \ - bytes[1] = tmp; \ - } - -#define SWAP4BYTE(VALUE) \ - { \ - GLubyte *bytes = (GLubyte *) &(VALUE); \ - GLubyte tmp = bytes[0]; \ - bytes[0] = bytes[3]; \ - bytes[3] = tmp; \ - tmp = bytes[1]; \ - bytes[1] = bytes[2]; \ - bytes[2] = tmp; \ - } - +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) In my experience it's much simpler to just write these as return ((x & 0xff) << 8) | ((x >> 8) & 0xff); and return ((x & 0xff) << 24) | ((x & 0xff00) << 8) | ((x & 0xff) >> 8) | ((x >> 24) & 0xff); and not have to deal with compiler intrinsics. Compilers will recognize these patterns and use the appropriate instructions (rol for 2-bytes and bswap for 4-bytes). You should be able to count the numbers of those instructions before and after such a patch to confirm it's working as expected. Fair enough. I've attached a new patch that optimizes the code on linux systems only where there is byteswap.h containing (hopefully optimal) functions. The other systems may be dealt with by a followup patch if desired. This doesn't really address Matt's point that compilers are good at pattern matching byte swaps already. Unless you can actually show with comparisons of the assembly and/or benchmarks that this results in better code, your patch makes the code base slightly worse because you've now added two different code paths where there was previously only one. Cheers, Nicolai From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001 From: Jochen Rollwagen Date: Thu, 5 Oct 2017 20:48:46 +0200 Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems This patch replaces the code for byte swapping in pack.c with the function from byteswap.h on Linux systems --- src/mesa/main/pack.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 94a6d28..d8ab267 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, } } +#ifdef __linux__ +#include + +#define SWAP2BYTE(VALUE) bswap_16(VALUE) +#define SWAP4BYTE(VALUE) bswap_32(VALUE) +#else #define SWAP2BYTE(VALUE) \ { \ @@ -249,7 +255,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, bytes[1] = bytes[2]; \ bytes[2] = tmp; \ } - +#endif static void extract_uint_indexes(GLuint n, GLuint indexes[], ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev -- Lerne, wie die Welt wirklich ist, Aber vergiss niemals, wie sie sein sollte. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c
On Thu, Oct 5, 2017 at 8:59 PM, Jochen Rollwagenwrote: > Am 04.10.2017 um 05:59 schrieb Matt Turner: >> >> On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen >> wrote: >>> >>> From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 >>> From: Jochen Rollwagen >>> Date: Tue, 3 Oct 2017 19:54:10 +0200 >>> Subject: [PATCH] Replace byte-swapping code with builtins in pack.c >>> >>> This patch replaces some code for byte-swapping in pack.c with the >>> builtin >>> functions allowing the compiler to do its optimization magic >>> --- >>> src/mesa/main/pack.c | 22 ++ >>> 1 file changed, 2 insertions(+), 20 deletions(-) >>> >>> diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c >>> index 94a6d28..9bfde39 100644 >>> --- a/src/mesa/main/pack.c >>> +++ b/src/mesa/main/pack.c >>> @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const >>> GLubyte >>> *source, >>> } >>> } >>> >>> - >>> -#define SWAP2BYTE(VALUE)\ >>> - {\ >>> - GLubyte *bytes = (GLubyte *) &(VALUE);\ >>> - GLubyte tmp = bytes[0];\ >>> - bytes[0] = bytes[1];\ >>> - bytes[1] = tmp;\ >>> - } >>> - >>> -#define SWAP4BYTE(VALUE)\ >>> - {\ >>> - GLubyte *bytes = (GLubyte *) &(VALUE);\ >>> - GLubyte tmp = bytes[0];\ >>> - bytes[0] = bytes[3];\ >>> - bytes[3] = tmp;\ >>> - tmp = bytes[1];\ >>> - bytes[1] = bytes[2];\ >>> - bytes[2] = tmp;\ >>> - } >>> - >>> +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) >>> +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) >> >> In my experience it's much simpler to just write these as >> >> return ((x & 0xff) << 8) | ((x >> 8) & 0xff); >> >> and >> >> return ((x & 0xff) << 24) | >> ((x & 0xff00) << 8) | >> ((x & 0xff) >> 8) | >> ((x >> 24) & 0xff); >> >> and not have to deal with compiler intrinsics. Compilers will >> recognize these patterns and use the appropriate instructions (rol for >> 2-bytes and bswap for 4-bytes). >> >> You should be able to count the numbers of those instructions before >> and after such a patch to confirm it's working as expected. > > Fair enough. I've attached a new patch that optimizes the code on linux > systems only where there is byteswap.h containing (hopefully optimal) > functions. The other systems may be dealt with by a followup patch if > desired. > > From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001 > From: Jochen Rollwagen > Date: Thu, 5 Oct 2017 20:48:46 +0200 > Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems > > This patch replaces the code for byte swapping in pack.c with the function > from > byteswap.h on Linux systems > --- > src/mesa/main/pack.c |8 +++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c > index 94a6d28..d8ab267 100644 > --- a/src/mesa/main/pack.c > +++ b/src/mesa/main/pack.c > @@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const > GLubyte > *source, > } > } > > +#ifdef __linux__ > +#include > + > +#define SWAP2BYTE(VALUE) bswap_16(VALUE) > +#define SWAP4BYTE(VALUE) bswap_32(VALUE) > +#else Another alternative would be to use: AX_GCC_BUILTIN / HAVE___BUILTIN_BSWAP{16,32} ...to check explicitly for the builtins, like we do for other builtins. We already do this for __builtin_bswap32 and __builtin_bswap64, so it would seem like a pretty straight-forward extension of that pattern. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c
Am 04.10.2017 um 05:59 schrieb Matt Turner: On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagenwrote: From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 From: Jochen Rollwagen Date: Tue, 3 Oct 2017 19:54:10 +0200 Subject: [PATCH] Replace byte-swapping code with builtins in pack.c This patch replaces some code for byte-swapping in pack.c with the builtin functions allowing the compiler to do its optimization magic --- src/mesa/main/pack.c | 22 ++ 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 94a6d28..9bfde39 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, } } - -#define SWAP2BYTE(VALUE)\ - {\ - GLubyte *bytes = (GLubyte *) &(VALUE);\ - GLubyte tmp = bytes[0];\ - bytes[0] = bytes[1];\ - bytes[1] = tmp;\ - } - -#define SWAP4BYTE(VALUE)\ - {\ - GLubyte *bytes = (GLubyte *) &(VALUE);\ - GLubyte tmp = bytes[0];\ - bytes[0] = bytes[3];\ - bytes[3] = tmp;\ - tmp = bytes[1];\ - bytes[1] = bytes[2];\ - bytes[2] = tmp;\ - } - +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) In my experience it's much simpler to just write these as return ((x & 0xff) << 8) | ((x >> 8) & 0xff); and return ((x & 0xff) << 24) | ((x & 0xff00) << 8) | ((x & 0xff) >> 8) | ((x >> 24) & 0xff); and not have to deal with compiler intrinsics. Compilers will recognize these patterns and use the appropriate instructions (rol for 2-bytes and bswap for 4-bytes). You should be able to count the numbers of those instructions before and after such a patch to confirm it's working as expected. Fair enough. I've attached a new patch that optimizes the code on linux systems only where there is byteswap.h containing (hopefully optimal) functions. The other systems may be dealt with by a followup patch if desired. From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001 From: Jochen Rollwagen Date: Thu, 5 Oct 2017 20:48:46 +0200 Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems This patch replaces the code for byte swapping in pack.c with the function from byteswap.h on Linux systems --- src/mesa/main/pack.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 94a6d28..d8ab267 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, } } +#ifdef __linux__ +#include + +#define SWAP2BYTE(VALUE) bswap_16(VALUE) +#define SWAP4BYTE(VALUE) bswap_32(VALUE) +#else #define SWAP2BYTE(VALUE)\ {\ @@ -249,7 +255,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, bytes[1] = bytes[2];\ bytes[2] = tmp;\ } - +#endif static void extract_uint_indexes(GLuint n, GLuint indexes[], -- 1.7.9.5 Cheers Jochen >From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001 From: Jochen Rollwagen Date: Thu, 5 Oct 2017 20:48:46 +0200 Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems This patch replaces the code for byte swapping in pack.c with the function from byteswap.h on Linux systems --- src/mesa/main/pack.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 94a6d28..d8ab267 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, } } +#ifdef __linux__ +#include + +#define SWAP2BYTE(VALUE) bswap_16(VALUE) +#define SWAP4BYTE(VALUE) bswap_32(VALUE) +#else #define SWAP2BYTE(VALUE) \ { \ @@ -249,7 +255,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, bytes[1] = bytes[2]; \ bytes[2] = tmp;\ } - +#endif static void extract_uint_indexes(GLuint n, GLuint indexes[], -- 1.7.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c
On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagenwrote: > From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 > From: Jochen Rollwagen > Date: Tue, 3 Oct 2017 19:54:10 +0200 > Subject: [PATCH] Replace byte-swapping code with builtins in pack.c > > This patch replaces some code for byte-swapping in pack.c with the builtin > functions allowing the compiler to do its optimization magic > --- > src/mesa/main/pack.c | 22 ++ > 1 file changed, 2 insertions(+), 20 deletions(-) > > diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c > index 94a6d28..9bfde39 100644 > --- a/src/mesa/main/pack.c > +++ b/src/mesa/main/pack.c > @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const > GLubyte > *source, > } > } > > - > -#define SWAP2BYTE(VALUE)\ > - {\ > - GLubyte *bytes = (GLubyte *) &(VALUE);\ > - GLubyte tmp = bytes[0];\ > - bytes[0] = bytes[1];\ > - bytes[1] = tmp;\ > - } > - > -#define SWAP4BYTE(VALUE)\ > - {\ > - GLubyte *bytes = (GLubyte *) &(VALUE);\ > - GLubyte tmp = bytes[0];\ > - bytes[0] = bytes[3];\ > - bytes[3] = tmp;\ > - tmp = bytes[1];\ > - bytes[1] = bytes[2];\ > - bytes[2] = tmp;\ > - } > - > +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) > +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) In my experience it's much simpler to just write these as return ((x & 0xff) << 8) | ((x >> 8) & 0xff); and return ((x & 0xff) << 24) | ((x & 0xff00) << 8) | ((x & 0xff) >> 8) | ((x >> 24) & 0xff); and not have to deal with compiler intrinsics. Compilers will recognize these patterns and use the appropriate instructions (rol for 2-bytes and bswap for 4-bytes). You should be able to count the numbers of those instructions before and after such a patch to confirm it's working as expected. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c
On 10/03/2017 12:01 PM, Jochen Rollwagen wrote: From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 From: Jochen RollwagenDate: Tue, 3 Oct 2017 19:54:10 +0200 Subject: [PATCH] Replace byte-swapping code with builtins in pack.c This patch replaces some code for byte-swapping in pack.c with the builtin functions allowing the compiler to do its optimization magic --- src/mesa/main/pack.c | 22 ++ 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 94a6d28..9bfde39 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, } } - -#define SWAP2BYTE(VALUE)\ - {\ - GLubyte *bytes = (GLubyte *) &(VALUE);\ - GLubyte tmp = bytes[0];\ - bytes[0] = bytes[1];\ - bytes[1] = tmp;\ - } - -#define SWAP4BYTE(VALUE)\ - {\ - GLubyte *bytes = (GLubyte *) &(VALUE);\ - GLubyte tmp = bytes[0];\ - bytes[0] = bytes[3];\ - bytes[3] = tmp;\ - tmp = bytes[1];\ - bytes[1] = bytes[2];\ - bytes[2] = tmp;\ - } - +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) Looks like a gcc feature. We also need to build with other compilers like Microsoft's. -Brian static void extract_uint_indexes(GLuint n, GLuint indexes[], ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c
From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 From: Jochen RollwagenDate: Tue, 3 Oct 2017 19:54:10 +0200 Subject: [PATCH] Replace byte-swapping code with builtins in pack.c This patch replaces some code for byte-swapping in pack.c with the builtin functions allowing the compiler to do its optimization magic --- src/mesa/main/pack.c | 22 ++ 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 94a6d28..9bfde39 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, } } - -#define SWAP2BYTE(VALUE)\ - {\ - GLubyte *bytes = (GLubyte *) &(VALUE);\ - GLubyte tmp = bytes[0];\ - bytes[0] = bytes[1];\ - bytes[1] = tmp;\ - } - -#define SWAP4BYTE(VALUE)\ - {\ - GLubyte *bytes = (GLubyte *) &(VALUE);\ - GLubyte tmp = bytes[0];\ - bytes[0] = bytes[3];\ - bytes[3] = tmp;\ - tmp = bytes[1];\ - bytes[1] = bytes[2];\ - bytes[2] = tmp;\ - } - +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) static void extract_uint_indexes(GLuint n, GLuint indexes[], -- 1.7.9.5 >From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001 From: Jochen Rollwagen Date: Tue, 3 Oct 2017 19:54:10 +0200 Subject: [PATCH] Replace byte-swapping code with builtins in pack.c This patch replaces some code for byte-swapping in pack.c with the builtin functions allowing the compiler to do its optimization magic --- src/mesa/main/pack.c | 22 ++ 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 94a6d28..9bfde39 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, } } - -#define SWAP2BYTE(VALUE) \ - { \ - GLubyte *bytes = (GLubyte *) &(VALUE); \ - GLubyte tmp = bytes[0]; \ - bytes[0] = bytes[1]; \ - bytes[1] = tmp;\ - } - -#define SWAP4BYTE(VALUE) \ - { \ - GLubyte *bytes = (GLubyte *) &(VALUE); \ - GLubyte tmp = bytes[0]; \ - bytes[0] = bytes[3]; \ - bytes[3] = tmp;\ - tmp = bytes[1];\ - bytes[1] = bytes[2]; \ - bytes[2] = tmp;\ - } - +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE) +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE) static void extract_uint_indexes(GLuint n, GLuint indexes[], -- 1.7.9.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev