Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c

2017-10-09 Thread Jochen Rollwagen

Am 06.10.2017 um 11:57 schrieb Nicolai Hähnle:

On 05.10.2017 20:59, Jochen Rollwagen wrote:

Am 04.10.2017 um 05:59 schrieb Matt Turner:
On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen 
 wrote:
 From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 
2001

From: Jochen Rollwagen 
Date: Tue, 3 Oct 2017 19:54:10 +0200
Subject: [PATCH] Replace byte-swapping code with builtins in pack.c

This patch replaces some code for byte-swapping in pack.c with the 
builtin

functions allowing the compiler to do its optimization magic
---
  src/mesa/main/pack.c |   22 ++
  1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 94a6d28..9bfde39 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, 
const

GLubyte
  *source,
 }
  }

-
-#define SWAP2BYTE(VALUE)\
-   {\
-  GLubyte *bytes = (GLubyte *) &(VALUE);\
-  GLubyte tmp = bytes[0];\
-  bytes[0] = bytes[1];\
-  bytes[1] = tmp;\
-   }
-
-#define SWAP4BYTE(VALUE)\
-   {\
-  GLubyte *bytes = (GLubyte *) &(VALUE);\
-  GLubyte tmp = bytes[0];\
-  bytes[0] = bytes[3];\
-  bytes[3] = tmp;\
-  tmp = bytes[1];\
-  bytes[1] = bytes[2];\
-  bytes[2] = tmp;\
-   }
-
+#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
+#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)

In my experience it's much simpler to just write these as

return ((x & 0xff) << 8) | ((x >> 8) & 0xff);

and

return  ((x & 0xff) << 24) |
((x & 0xff00) << 8) |
((x & 0xff) >> 8) |
((x >> 24) & 0xff);

and not have to deal with compiler intrinsics. Compilers will
recognize these patterns and use the appropriate instructions (rol for
2-bytes and bswap for 4-bytes).

You should be able to count the numbers of those instructions before
and after such a patch to confirm it's working as expected.
Fair enough. I've attached a new patch that optimizes the code on 
linux systems only where there is byteswap.h containing (hopefully 
optimal) functions. The other systems may be dealt with by a followup 
patch if desired.


This doesn't really address Matt's point that compilers are good at 
pattern matching byte swaps already.


Unless you can actually show with comparisons of the assembly and/or 
benchmarks that this results in better code, your patch makes the code 
base slightly worse because you've now added two different code paths 
where there was previously only one.


Cheers,
Nicolai
I'm afraid you're overestimating the compiler's abilities to detect such 
a byte swapping pattern.


The following c code

#include 

static uint32_t
SWAP4BYTE(uint32_t n)
{
   return (n >> 24) |
  ((n >> 8) & 0xff00) |
  ((n << 8) & 0x00ff) |
  (n << 24);
}

static uint32_t
builtin_SWAP4BYTE(uint32_t n)
{
   return __builtin_bswap32(n);
}

main(){
SWAP4BYTE(321764);
buitlin_SWAP4BYTE(321764);
}

compiles to the following assembler code on powerpc platforms with gcc 
--version gcc (Ubuntu 6.2.0-3ubuntu11~12.04) 6.2.0 20160901:


.section".text"
.align 2
.typeSWAP4BYTE, @function
SWAP4BYTE:
stwu 1,-32(1)
stw 31,28(1)
mr 31,1
stw 3,12(31)
lwz 9,12(31)
srwi 10,9,24
lwz 9,12(31)
srwi 9,9,8
rlwinm 9,9,0,16,23
or 10,10,9
lwz 9,12(31)
slwi 9,9,8
rlwinm 9,9,0,8,15
or 10,10,9
lwz 9,12(31)
slwi 9,9,24
or 9,10,9
mr 3,9
addi 11,31,32
lwz 31,-4(11)
mr 1,11
blr
.sizeSWAP4BYTE,.-SWAP4BYTE
.align 2
.typebuiltin_SWAP4BYTE, @function
builtin_SWAP4BYTE:
stwu 1,-32(1)
stw 31,28(1)
mr 31,1
stw 3,12(31)
addi 10,31,12
lwbrx 9,0,10
mr 3,9
addi 11,31,32
lwz 31,-4(11)
mr 1,11
blr
.sizebuiltin_SWAP4BYTE,.-builtin_SWAP4BYTE
.align 2
.globl main
.typemain, @function

The builtin function seems slightly more optimized.

Cheers

Jochen

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c

2017-10-08 Thread Matt Turner
On Sun, Oct 8, 2017 at 12:15 PM, Jochen Rollwagen  wrote:
> Am 06.10.2017 um 11:57 schrieb Nicolai Hähnle:
>>
>> On 05.10.2017 20:59, Jochen Rollwagen wrote:
>>>
>>> Am 04.10.2017 um 05:59 schrieb Matt Turner:

 On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen
  wrote:
>
>  From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001
> From: Jochen Rollwagen 
> Date: Tue, 3 Oct 2017 19:54:10 +0200
> Subject: [PATCH] Replace byte-swapping code with builtins in pack.c
>
> This patch replaces some code for byte-swapping in pack.c with the
> builtin
> functions allowing the compiler to do its optimization magic
> ---
>   src/mesa/main/pack.c |   22 ++
>   1 file changed, 2 insertions(+), 20 deletions(-)
>
> diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
> index 94a6d28..9bfde39 100644
> --- a/src/mesa/main/pack.c
> +++ b/src/mesa/main/pack.c
> @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height,
> const
> GLubyte
>   *source,
>  }
>   }
>
> -
> -#define SWAP2BYTE(VALUE)\
> -   {\
> -  GLubyte *bytes = (GLubyte *) &(VALUE);\
> -  GLubyte tmp = bytes[0];\
> -  bytes[0] = bytes[1];\
> -  bytes[1] = tmp;\
> -   }
> -
> -#define SWAP4BYTE(VALUE)\
> -   {\
> -  GLubyte *bytes = (GLubyte *) &(VALUE);\
> -  GLubyte tmp = bytes[0];\
> -  bytes[0] = bytes[3];\
> -  bytes[3] = tmp;\
> -  tmp = bytes[1];\
> -  bytes[1] = bytes[2];\
> -  bytes[2] = tmp;\
> -   }
> -
> +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
> +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)

 In my experience it's much simpler to just write these as

 return ((x & 0xff) << 8) | ((x >> 8) & 0xff);

 and

 return  ((x & 0xff) << 24) |
 ((x & 0xff00) << 8) |
 ((x & 0xff) >> 8) |
 ((x >> 24) & 0xff);

 and not have to deal with compiler intrinsics. Compilers will
 recognize these patterns and use the appropriate instructions (rol for
 2-bytes and bswap for 4-bytes).

 You should be able to count the numbers of those instructions before
 and after such a patch to confirm it's working as expected.
>>>
>>> Fair enough. I've attached a new patch that optimizes the code on linux
>>> systems only where there is byteswap.h containing (hopefully optimal)
>>> functions. The other systems may be dealt with by a followup patch if
>>> desired.
>>
>>
>> This doesn't really address Matt's point that compilers are good at
>> pattern matching byte swaps already.
>>
>> Unless you can actually show with comparisons of the assembly and/or
>> benchmarks that this results in better code, your patch makes the code base
>> slightly worse because you've now added two different code paths where there
>> was previously only one.
>>
>> Cheers,
>> Nicolai
>
> I'm afraid you're overestimating the compiler's abilities to detect such a
> byte swapping pattern.
>
> The following c code
>
> #include 
>
> static uint32_t
> SWAP4BYTE(uint32_t n)
> {
>return (n >> 24) |
>   ((n >> 8) & 0xff00) |
>   ((n << 8) & 0x00ff) |
>   (n << 24);
> }
>
> static uint32_t
> builtin_SWAP4BYTE(uint32_t n)
> {
>return __builtin_bswap32(n);
> }
>
> main(){
> SWAP4BYTE(321764);
> buitlin_SWAP4BYTE(321764);
> }
>
> compiles to the following assembler code on powerpc platforms with gcc
> --version gcc (Ubuntu 6.2.0-3ubuntu11~12.04) 6.2.0 20160901:
>
> .section".text"
> .align 2
> .typeSWAP4BYTE, @function
> SWAP4BYTE:
> stwu 1,-32(1)
> stw 31,28(1)
> mr 31,1
> stw 3,12(31)
> lwz 9,12(31)
> srwi 10,9,24
> lwz 9,12(31)
> srwi 9,9,8
> rlwinm 9,9,0,16,23
> or 10,10,9
> lwz 9,12(31)
> slwi 9,9,8
> rlwinm 9,9,0,8,15
> or 10,10,9
> lwz 9,12(31)
> slwi 9,9,24
> or 9,10,9
> mr 3,9
> addi 11,31,32
> lwz 31,-4(11)
> mr 1,11
> blr
> .sizeSWAP4BYTE,.-SWAP4BYTE
> .align 2
> .typebuiltin_SWAP4BYTE, @function
> builtin_SWAP4BYTE:
> stwu 1,-32(1)
> stw 31,28(1)
> mr 31,1
> stw 3,12(31)
> addi 10,31,12
> lwbrx 9,0,10
> mr 3,9
> addi 11,31,32
> lwz 31,-4(11)
> mr 1,11
> blr
> .sizebuiltin_SWAP4BYTE,.-builtin_SWAP4BYTE
> .align 2
> .globl main
> .typemain, @function
>
> The builtin function seems slightly more optimized.

Compile with optimization, like a real build. With -O2 it produces

Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c

2017-10-06 Thread Dylan Baker
Quoting Erik Faye-Lund (2017-10-06 00:31:20)
> On Thu, Oct 5, 2017 at 8:59 PM, Jochen Rollwagen  
> wrote:
> > Am 04.10.2017 um 05:59 schrieb Matt Turner:
> >>
> >> On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen 
> >> wrote:
> >>>
> >>>  From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001
> >>> From: Jochen Rollwagen 
> >>> Date: Tue, 3 Oct 2017 19:54:10 +0200
> >>> Subject: [PATCH] Replace byte-swapping code with builtins in pack.c
> >>>
> >>> This patch replaces some code for byte-swapping in pack.c with the
> >>> builtin
> >>> functions allowing the compiler to do its optimization magic
> >>> ---
> >>>   src/mesa/main/pack.c |   22 ++
> >>>   1 file changed, 2 insertions(+), 20 deletions(-)
> >>>
> >>> diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
> >>> index 94a6d28..9bfde39 100644
> >>> --- a/src/mesa/main/pack.c
> >>> +++ b/src/mesa/main/pack.c
> >>> @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const
> >>> GLubyte
> >>>   *source,
> >>>  }
> >>>   }
> >>>
> >>> -
> >>> -#define SWAP2BYTE(VALUE)\
> >>> -   {\
> >>> -  GLubyte *bytes = (GLubyte *) &(VALUE);\
> >>> -  GLubyte tmp = bytes[0];\
> >>> -  bytes[0] = bytes[1];\
> >>> -  bytes[1] = tmp;\
> >>> -   }
> >>> -
> >>> -#define SWAP4BYTE(VALUE)\
> >>> -   {\
> >>> -  GLubyte *bytes = (GLubyte *) &(VALUE);\
> >>> -  GLubyte tmp = bytes[0];\
> >>> -  bytes[0] = bytes[3];\
> >>> -  bytes[3] = tmp;\
> >>> -  tmp = bytes[1];\
> >>> -  bytes[1] = bytes[2];\
> >>> -  bytes[2] = tmp;\
> >>> -   }
> >>> -
> >>> +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
> >>> +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)
> >>
> >> In my experience it's much simpler to just write these as
> >>
> >> return ((x & 0xff) << 8) | ((x >> 8) & 0xff);
> >>
> >> and
> >>
> >> return  ((x & 0xff) << 24) |
> >> ((x & 0xff00) << 8) |
> >> ((x & 0xff) >> 8) |
> >> ((x >> 24) & 0xff);
> >>
> >> and not have to deal with compiler intrinsics. Compilers will
> >> recognize these patterns and use the appropriate instructions (rol for
> >> 2-bytes and bswap for 4-bytes).
> >>
> >> You should be able to count the numbers of those instructions before
> >> and after such a patch to confirm it's working as expected.
> >
> > Fair enough. I've attached a new patch that optimizes the code on linux
> > systems only where there is byteswap.h containing (hopefully optimal)
> > functions. The other systems may be dealt with by a followup patch if
> > desired.
> >
> > From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001
> > From: Jochen Rollwagen 
> > Date: Thu, 5 Oct 2017 20:48:46 +0200
> > Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems
> >
> > This patch replaces the code for byte swapping in pack.c with the function
> > from
> > byteswap.h on Linux systems
> > ---
> >  src/mesa/main/pack.c |8 +++-
> >  1 file changed, 7 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
> > index 94a6d28..d8ab267 100644
> > --- a/src/mesa/main/pack.c
> > +++ b/src/mesa/main/pack.c
> > @@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const
> > GLubyte
> >  *source,
> > }
> >  }
> >
> > +#ifdef __linux__
> > +#include 
> > +
> > +#define SWAP2BYTE(VALUE) bswap_16(VALUE)
> > +#define SWAP4BYTE(VALUE) bswap_32(VALUE)
> > +#else
> 
> Another alternative would be to use:
> 
> AX_GCC_BUILTIN / HAVE___BUILTIN_BSWAP{16,32}
> 
> ...to check explicitly for the builtins, like we do for other
> builtins. We already do this for __builtin_bswap32 and
> __builtin_bswap64, so it would seem like a pretty straight-forward
> extension of that pattern.

Not withstanding Matt and Nicolai's points:

We already check for bswap builtins in configure (and meson), and would be the
right way to guard this since this isn't really linux specific, it's gcc/clang
specific, and I *think* the BSD's would benefit as well.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c

2017-10-06 Thread Nicolai Hähnle

On 05.10.2017 20:59, Jochen Rollwagen wrote:

Am 04.10.2017 um 05:59 schrieb Matt Turner:
On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen 
 wrote:

 From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001
From: Jochen Rollwagen 
Date: Tue, 3 Oct 2017 19:54:10 +0200
Subject: [PATCH] Replace byte-swapping code with builtins in pack.c

This patch replaces some code for byte-swapping in pack.c with the 
builtin

functions allowing the compiler to do its optimization magic
---
  src/mesa/main/pack.c |   22 ++
  1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 94a6d28..9bfde39 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const
GLubyte
  *source,
 }
  }

-
-#define SWAP2BYTE(VALUE)    \
-   {    \
-  GLubyte *bytes = (GLubyte *) &(VALUE);    \
-  GLubyte tmp = bytes[0];    \
-  bytes[0] = bytes[1];    \
-  bytes[1] = tmp;    \
-   }
-
-#define SWAP4BYTE(VALUE)    \
-   {    \
-  GLubyte *bytes = (GLubyte *) &(VALUE);    \
-  GLubyte tmp = bytes[0];    \
-  bytes[0] = bytes[3];    \
-  bytes[3] = tmp;    \
-  tmp = bytes[1];    \
-  bytes[1] = bytes[2];    \
-  bytes[2] = tmp;    \
-   }
-
+#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
+#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)

In my experience it's much simpler to just write these as

    return ((x & 0xff) << 8) | ((x >> 8) & 0xff);

and

    return  ((x & 0xff) << 24) |
    ((x & 0xff00) << 8) |
    ((x & 0xff) >> 8) |
    ((x >> 24) & 0xff);

and not have to deal with compiler intrinsics. Compilers will
recognize these patterns and use the appropriate instructions (rol for
2-bytes and bswap for 4-bytes).

You should be able to count the numbers of those instructions before
and after such a patch to confirm it's working as expected.
Fair enough. I've attached a new patch that optimizes the code on linux 
systems only where there is byteswap.h containing (hopefully optimal) 
functions. The other systems may be dealt with by a followup patch if 
desired.


This doesn't really address Matt's point that compilers are good at 
pattern matching byte swaps already.


Unless you can actually show with comparisons of the assembly and/or 
benchmarks that this results in better code, your patch makes the code 
base slightly worse because you've now added two different code paths 
where there was previously only one.


Cheers,
Nicolai




 From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001
From: Jochen Rollwagen 
Date: Thu, 5 Oct 2017 20:48:46 +0200
Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems

This patch replaces the code for byte swapping in pack.c with the 
function from

byteswap.h on Linux systems
---
  src/mesa/main/pack.c |    8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 94a6d28..d8ab267 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const 
GLubyte

  *source,
     }
  }

+#ifdef __linux__
+#include 
+
+#define SWAP2BYTE(VALUE) bswap_16(VALUE)
+#define SWAP4BYTE(VALUE) bswap_32(VALUE)
+#else

  #define SWAP2BYTE(VALUE)    \
     {    \
@@ -249,7 +255,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const 
GLubyte

*source,
    bytes[1] = bytes[2];    \
    bytes[2] = tmp;    \
     }
-
+#endif

  static void
  extract_uint_indexes(GLuint n, GLuint indexes[],


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c

2017-10-06 Thread Erik Faye-Lund
On Thu, Oct 5, 2017 at 8:59 PM, Jochen Rollwagen  wrote:
> Am 04.10.2017 um 05:59 schrieb Matt Turner:
>>
>> On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen 
>> wrote:
>>>
>>>  From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001
>>> From: Jochen Rollwagen 
>>> Date: Tue, 3 Oct 2017 19:54:10 +0200
>>> Subject: [PATCH] Replace byte-swapping code with builtins in pack.c
>>>
>>> This patch replaces some code for byte-swapping in pack.c with the
>>> builtin
>>> functions allowing the compiler to do its optimization magic
>>> ---
>>>   src/mesa/main/pack.c |   22 ++
>>>   1 file changed, 2 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
>>> index 94a6d28..9bfde39 100644
>>> --- a/src/mesa/main/pack.c
>>> +++ b/src/mesa/main/pack.c
>>> @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const
>>> GLubyte
>>>   *source,
>>>  }
>>>   }
>>>
>>> -
>>> -#define SWAP2BYTE(VALUE)\
>>> -   {\
>>> -  GLubyte *bytes = (GLubyte *) &(VALUE);\
>>> -  GLubyte tmp = bytes[0];\
>>> -  bytes[0] = bytes[1];\
>>> -  bytes[1] = tmp;\
>>> -   }
>>> -
>>> -#define SWAP4BYTE(VALUE)\
>>> -   {\
>>> -  GLubyte *bytes = (GLubyte *) &(VALUE);\
>>> -  GLubyte tmp = bytes[0];\
>>> -  bytes[0] = bytes[3];\
>>> -  bytes[3] = tmp;\
>>> -  tmp = bytes[1];\
>>> -  bytes[1] = bytes[2];\
>>> -  bytes[2] = tmp;\
>>> -   }
>>> -
>>> +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
>>> +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)
>>
>> In my experience it's much simpler to just write these as
>>
>> return ((x & 0xff) << 8) | ((x >> 8) & 0xff);
>>
>> and
>>
>> return  ((x & 0xff) << 24) |
>> ((x & 0xff00) << 8) |
>> ((x & 0xff) >> 8) |
>> ((x >> 24) & 0xff);
>>
>> and not have to deal with compiler intrinsics. Compilers will
>> recognize these patterns and use the appropriate instructions (rol for
>> 2-bytes and bswap for 4-bytes).
>>
>> You should be able to count the numbers of those instructions before
>> and after such a patch to confirm it's working as expected.
>
> Fair enough. I've attached a new patch that optimizes the code on linux
> systems only where there is byteswap.h containing (hopefully optimal)
> functions. The other systems may be dealt with by a followup patch if
> desired.
>
> From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001
> From: Jochen Rollwagen 
> Date: Thu, 5 Oct 2017 20:48:46 +0200
> Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems
>
> This patch replaces the code for byte swapping in pack.c with the function
> from
> byteswap.h on Linux systems
> ---
>  src/mesa/main/pack.c |8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
> index 94a6d28..d8ab267 100644
> --- a/src/mesa/main/pack.c
> +++ b/src/mesa/main/pack.c
> @@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const
> GLubyte
>  *source,
> }
>  }
>
> +#ifdef __linux__
> +#include 
> +
> +#define SWAP2BYTE(VALUE) bswap_16(VALUE)
> +#define SWAP4BYTE(VALUE) bswap_32(VALUE)
> +#else

Another alternative would be to use:

AX_GCC_BUILTIN / HAVE___BUILTIN_BSWAP{16,32}

...to check explicitly for the builtins, like we do for other
builtins. We already do this for __builtin_bswap32 and
__builtin_bswap64, so it would seem like a pretty straight-forward
extension of that pattern.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c

2017-10-06 Thread Jochen Rollwagen

Am 04.10.2017 um 05:59 schrieb Matt Turner:

On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen  wrote:

 From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001
From: Jochen Rollwagen 
Date: Tue, 3 Oct 2017 19:54:10 +0200
Subject: [PATCH] Replace byte-swapping code with builtins in pack.c

This patch replaces some code for byte-swapping in pack.c with the builtin
functions allowing the compiler to do its optimization magic
---
  src/mesa/main/pack.c |   22 ++
  1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 94a6d28..9bfde39 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const
GLubyte
  *source,
 }
  }

-
-#define SWAP2BYTE(VALUE)\
-   {\
-  GLubyte *bytes = (GLubyte *) &(VALUE);\
-  GLubyte tmp = bytes[0];\
-  bytes[0] = bytes[1];\
-  bytes[1] = tmp;\
-   }
-
-#define SWAP4BYTE(VALUE)\
-   {\
-  GLubyte *bytes = (GLubyte *) &(VALUE);\
-  GLubyte tmp = bytes[0];\
-  bytes[0] = bytes[3];\
-  bytes[3] = tmp;\
-  tmp = bytes[1];\
-  bytes[1] = bytes[2];\
-  bytes[2] = tmp;\
-   }
-
+#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
+#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)

In my experience it's much simpler to just write these as

return ((x & 0xff) << 8) | ((x >> 8) & 0xff);

and

return  ((x & 0xff) << 24) |
((x & 0xff00) << 8) |
((x & 0xff) >> 8) |
((x >> 24) & 0xff);

and not have to deal with compiler intrinsics. Compilers will
recognize these patterns and use the appropriate instructions (rol for
2-bytes and bswap for 4-bytes).

You should be able to count the numbers of those instructions before
and after such a patch to confirm it's working as expected.
Fair enough. I've attached a new patch that optimizes the code on linux 
systems only where there is byteswap.h containing (hopefully optimal) 
functions. The other systems may be dealt with by a followup patch if 
desired.


From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001
From: Jochen Rollwagen 
Date: Thu, 5 Oct 2017 20:48:46 +0200
Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems

This patch replaces the code for byte swapping in pack.c with the 
function from

byteswap.h on Linux systems
---
 src/mesa/main/pack.c |8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 94a6d28..d8ab267 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const 
GLubyte

 *source,
}
 }

+#ifdef __linux__
+#include 
+
+#define SWAP2BYTE(VALUE) bswap_16(VALUE)
+#define SWAP4BYTE(VALUE) bswap_32(VALUE)
+#else

 #define SWAP2BYTE(VALUE)\
{\
@@ -249,7 +255,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const 
GLubyte

*source,
   bytes[1] = bytes[2];\
   bytes[2] = tmp;\
}
-
+#endif

 static void
 extract_uint_indexes(GLuint n, GLuint indexes[],
--
1.7.9.5

Cheers

Jochen
>From 327012410f75f77010b658ce9643a229c45bc308 Mon Sep 17 00:00:00 2001
From: Jochen Rollwagen 
Date: Thu, 5 Oct 2017 20:48:46 +0200
Subject: [PATCH] Simplify byte swapping code in pack.c on Linux systems

This patch replaces the code for byte swapping in pack.c with the function from byteswap.h on Linux systems
---
 src/mesa/main/pack.c |8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 94a6d28..d8ab267 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -230,6 +230,12 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source,
}
 }
 
+#ifdef __linux__
+#include 
+
+#define SWAP2BYTE(VALUE) bswap_16(VALUE)
+#define SWAP4BYTE(VALUE) bswap_32(VALUE)
+#else
 
 #define SWAP2BYTE(VALUE)			\
{		\
@@ -249,7 +255,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source,
   bytes[1] = bytes[2];			\
   bytes[2] = tmp;\
}
-
+#endif
 
 static void
 extract_uint_indexes(GLuint n, GLuint indexes[],
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c

2017-10-03 Thread Matt Turner
On Tue, Oct 3, 2017 at 11:01 AM, Jochen Rollwagen  wrote:
> From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001
> From: Jochen Rollwagen 
> Date: Tue, 3 Oct 2017 19:54:10 +0200
> Subject: [PATCH] Replace byte-swapping code with builtins in pack.c
>
> This patch replaces some code for byte-swapping in pack.c with the builtin
> functions allowing the compiler to do its optimization magic
> ---
>  src/mesa/main/pack.c |   22 ++
>  1 file changed, 2 insertions(+), 20 deletions(-)
>
> diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
> index 94a6d28..9bfde39 100644
> --- a/src/mesa/main/pack.c
> +++ b/src/mesa/main/pack.c
> @@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const
> GLubyte
>  *source,
> }
>  }
>
> -
> -#define SWAP2BYTE(VALUE)\
> -   {\
> -  GLubyte *bytes = (GLubyte *) &(VALUE);\
> -  GLubyte tmp = bytes[0];\
> -  bytes[0] = bytes[1];\
> -  bytes[1] = tmp;\
> -   }
> -
> -#define SWAP4BYTE(VALUE)\
> -   {\
> -  GLubyte *bytes = (GLubyte *) &(VALUE);\
> -  GLubyte tmp = bytes[0];\
> -  bytes[0] = bytes[3];\
> -  bytes[3] = tmp;\
> -  tmp = bytes[1];\
> -  bytes[1] = bytes[2];\
> -  bytes[2] = tmp;\
> -   }
> -
> +#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
> +#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)

In my experience it's much simpler to just write these as

   return ((x & 0xff) << 8) | ((x >> 8) & 0xff);

and

   return  ((x & 0xff) << 24) |
   ((x & 0xff00) << 8) |
   ((x & 0xff) >> 8) |
   ((x >> 24) & 0xff);

and not have to deal with compiler intrinsics. Compilers will
recognize these patterns and use the appropriate instructions (rol for
2-bytes and bswap for 4-bytes).

You should be able to count the numbers of those instructions before
and after such a patch to confirm it's working as expected.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c

2017-10-03 Thread Brian Paul

On 10/03/2017 12:01 PM, Jochen Rollwagen wrote:

 From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001
From: Jochen Rollwagen 
Date: Tue, 3 Oct 2017 19:54:10 +0200
Subject: [PATCH] Replace byte-swapping code with builtins in pack.c

This patch replaces some code for byte-swapping in pack.c with the
builtin functions allowing the compiler to do its optimization magic
---
  src/mesa/main/pack.c |   22 ++
  1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 94a6d28..9bfde39 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const
GLubyte
  *source,
 }
  }

-
-#define SWAP2BYTE(VALUE)\
-   {\
-  GLubyte *bytes = (GLubyte *) &(VALUE);\
-  GLubyte tmp = bytes[0];\
-  bytes[0] = bytes[1];\
-  bytes[1] = tmp;\
-   }
-
-#define SWAP4BYTE(VALUE)\
-   {\
-  GLubyte *bytes = (GLubyte *) &(VALUE);\
-  GLubyte tmp = bytes[0];\
-  bytes[0] = bytes[3];\
-  bytes[3] = tmp;\
-  tmp = bytes[1];\
-  bytes[1] = bytes[2];\
-  bytes[2] = tmp;\
-   }
-
+#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
+#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)


Looks like a gcc feature.  We also need to build with other compilers 
like Microsoft's.


-Brian



  static void
  extract_uint_indexes(GLuint n, GLuint indexes[],


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Replace byte-swapping code with builtins in pack.c

2017-10-03 Thread Jochen Rollwagen

From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001
From: Jochen Rollwagen 
Date: Tue, 3 Oct 2017 19:54:10 +0200
Subject: [PATCH] Replace byte-swapping code with builtins in pack.c

This patch replaces some code for byte-swapping in pack.c with the 
builtin functions allowing the compiler to do its optimization magic

---
 src/mesa/main/pack.c |   22 ++
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 94a6d28..9bfde39 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const 
GLubyte

 *source,
}
 }

-
-#define SWAP2BYTE(VALUE)\
-   {\
-  GLubyte *bytes = (GLubyte *) &(VALUE);\
-  GLubyte tmp = bytes[0];\
-  bytes[0] = bytes[1];\
-  bytes[1] = tmp;\
-   }
-
-#define SWAP4BYTE(VALUE)\
-   {\
-  GLubyte *bytes = (GLubyte *) &(VALUE);\
-  GLubyte tmp = bytes[0];\
-  bytes[0] = bytes[3];\
-  bytes[3] = tmp;\
-  tmp = bytes[1];\
-  bytes[1] = bytes[2];\
-  bytes[2] = tmp;\
-   }
-
+#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
+#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)

 static void
 extract_uint_indexes(GLuint n, GLuint indexes[],
--
1.7.9.5

>From 4cebe50a9bade6717923e104c954f3fad75f71bb Mon Sep 17 00:00:00 2001
From: Jochen Rollwagen 
Date: Tue, 3 Oct 2017 19:54:10 +0200
Subject: [PATCH] Replace byte-swapping code with builtins in pack.c

This patch replaces some code for byte-swapping in pack.c with the builtin functions
allowing the compiler to do its optimization magic
---
 src/mesa/main/pack.c |   22 ++
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 94a6d28..9bfde39 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -230,26 +230,8 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source,
}
 }
 
-
-#define SWAP2BYTE(VALUE)			\
-   {		\
-  GLubyte *bytes = (GLubyte *) &(VALUE);	\
-  GLubyte tmp = bytes[0];			\
-  bytes[0] = bytes[1];			\
-  bytes[1] = tmp;\
-   }
-
-#define SWAP4BYTE(VALUE)			\
-   {		\
-  GLubyte *bytes = (GLubyte *) &(VALUE);	\
-  GLubyte tmp = bytes[0];			\
-  bytes[0] = bytes[3];			\
-  bytes[3] = tmp;\
-  tmp = bytes[1];\
-  bytes[1] = bytes[2];			\
-  bytes[2] = tmp;\
-   }
-
+#define SWAP2BYTE(VALUE) __builtin_bswap16(VALUE)
+#define SWAP4BYTE(VALUE) __builtin_bswap32(VALUE)
 
 static void
 extract_uint_indexes(GLuint n, GLuint indexes[],
-- 
1.7.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev