On Mon, Mar 27, 2017 at 03:11:03PM -0700, Eric Anholt wrote: > The clever pointer tricks were actually not working, and we were doing > the byte-by-byte moves in general. By just doing the memcpy and > obvious byte swap code, we end up generating actual byte swap > instructions, thanks to optimizing compilers. > > text data bss dec hex filename > before: 2241932 51584 132016 2425532 2502bc hw/xfree86/Xorg > after: 2216276 51584 132016 2399876 249e84 hw/xfree86/Xorg > --- > doc/Xserver-spec.xml | 2 +- > glx/glxbyteorder.h | 21 ------------ > include/misc.h | 97 > ++++++++++++++++++---------------------------------- > os/io.c | 4 +-- > 4 files changed, 37 insertions(+), 87 deletions(-) > > diff --git a/doc/Xserver-spec.xml b/doc/Xserver-spec.xml > index 7867544e48a9..3dde65178b7f 100644 > --- a/doc/Xserver-spec.xml > +++ b/doc/Xserver-spec.xml > @@ -600,7 +600,7 @@ are: REQUEST, REQUEST_SIZE_MATCH, REQUEST_AT_LEAST_SIZE, > REQUEST_FIXED_SIZE, LEGAL_NEW_RESOURCE, and > VALIDATE_DRAWABLE_AND_GC. Useful byte swapping macros can be found > in Xserver/include/dix.h: WriteReplyToClient and WriteSwappedDataToClient; > and > -in Xserver/include/misc.h: lswapl, lswaps, LengthRestB, LengthRestS, > +in Xserver/include/misc.h: bswap_64, bswap_32, bswap_16, LengthRestB, > LengthRestS, > LengthRestL, SwapRestS, SwapRestL, swapl, swaps, cpswapl, and cpswaps.</para> > </section> > </section> > diff --git a/glx/glxbyteorder.h b/glx/glxbyteorder.h > index 5e94e8626e66..8f0cd8a4b0d7 100644 > --- a/glx/glxbyteorder.h > +++ b/glx/glxbyteorder.h > @@ -37,25 +37,4 @@ > > #include "misc.h" > > -static inline uint16_t > -bswap_16(uint16_t val) > -{ > - swap_uint16(&val); > - return val; > -} > - > -static inline uint32_t > -bswap_32(uint32_t val) > -{ > - swap_uint32(&val); > - return val; > -} > - > -static inline uint64_t > -bswap_64(uint64_t val) > -{ > - swap_uint64(&val); > - return val; > -} > - > #endif /* !defined(__GLXBYTEORDER_H__) */ > diff --git a/include/misc.h b/include/misc.h > index 01747fd38339..a75eb617c642 100644 > --- a/include/misc.h > +++ b/include/misc.h > @@ -128,21 +128,6 @@ typedef struct _xReq *xReqPtr; > #define USE_BACKGROUND_PIXEL 3 > #define USE_BORDER_PIXEL 3 > > -/* byte swap a 32-bit literal */ > -static inline uint32_t > -lswapl(uint32_t x) > -{ > - return ((x & 0xff) << 24) | > - ((x & 0xff00) << 8) | ((x & 0xff0000) >> 8) | ((x >> 24) & 0xff); > -} > - > -/* byte swap a 16-bit literal */ > -static inline uint16_t > -lswaps(uint16_t x) > -{ > - return (uint16_t)((x & 0xff) << 8) | ((x >> 8) & 0xff); > -} > - > #undef min > #undef max > > @@ -311,88 +296,74 @@ __builtin_constant_p(int x) > } > #endif > > -/* byte swap a 64-bit value */ > -static inline void > -swap_uint64(uint64_t *x) > +static inline uint64_t > +bswap_64(uint64_t x) > { > - char n; > - > - n = ((char *) x)[0]; > - ((char *) x)[0] = ((char *) x)[7]; > - ((char *) x)[7] = n; > - > - n = ((char *) x)[1]; > - ((char *) x)[1] = ((char *) x)[6]; > - ((char *) x)[6] = n; > - > - n = ((char *) x)[2]; > - ((char *) x)[2] = ((char *) x)[5]; > - ((char *) x)[5] = n; > - > - n = ((char *) x)[3]; > - ((char *) x)[3] = ((char *) x)[4]; > - ((char *) x)[4] = n; > + return (((x & 0xFF00000000000000ull) >> 56) | > + ((x & 0x00FF000000000000ull) >> 40) | > + ((x & 0x0000FF0000000000ull) >> 24) | > + ((x & 0x000000FF00000000ull) >> 8) | > + ((x & 0x00000000FF000000ull) << 8) | > + ((x & 0x0000000000FF0000ull) << 24) | > + ((x & 0x000000000000FF00ull) << 40) | > + ((x & 0x00000000000000FFull) << 56)); > } > > #define swapll(x) do { \ > + uint64_t temp; \ > if (sizeof(*(x)) != 8) \ > wrong_size(); \ > - swap_uint64((uint64_t *)(x)); \ > + memcpy(&temp, x, 8); \ > + temp = bswap_64(temp); \ > + memcpy(x, &temp, 8); \
is the memcpy really needed here? am I missing something? We're passing by value into bswap_64() now, so we get the copy for free anyway. Same in the swapl below. fwiww, test/misc.c would be trivially amended to test all of these macros for correctness :) Cheers, Peter > } while (0) > > -/* byte swap a 32-bit value */ > -static inline void > -swap_uint32(uint32_t * x) > +static inline uint32_t > +bswap_32(uint32_t x) > { > - char n = ((char *) x)[0]; > - > - ((char *) x)[0] = ((char *) x)[3]; > - ((char *) x)[3] = n; > - n = ((char *) x)[1]; > - ((char *) x)[1] = ((char *) x)[2]; > - ((char *) x)[2] = n; > + return (((x & 0xFF000000) >> 24) | > + ((x & 0x00FF0000) >> 8) | > + ((x & 0x0000FF00) << 8) | > + ((x & 0x000000FF) << 24)); > } > > #define swapl(x) do { \ > + uint32_t temp; \ > if (sizeof(*(x)) != 4) \ > wrong_size(); \ > - if (__builtin_constant_p((uintptr_t)(x) & 3) && ((uintptr_t)(x) > & 3) == 0) \ > - *(x) = lswapl(*(x)); \ > - else \ > - swap_uint32((uint32_t *)(x)); \ > + memcpy(&temp, x, 4); \ > + temp = bswap_32(temp); \ > + memcpy(x, &temp, 4); \ > } while (0) > > -/* byte swap a 16-bit value */ > -static inline void > -swap_uint16(uint16_t * x) > +static inline uint16_t > +bswap_16(uint16_t x) > { > - char n = ((char *) x)[0]; > - > - ((char *) x)[0] = ((char *) x)[1]; > - ((char *) x)[1] = n; > + return (((x & 0xFF00) >> 8) | > + ((x & 0x00FF) << 8)); > } > > #define swaps(x) do { \ > + uint16_t temp; \ > if (sizeof(*(x)) != 2) \ > wrong_size(); \ > - if (__builtin_constant_p((uintptr_t)(x) & 1) && ((uintptr_t)(x) > & 1) == 0) \ > - *(x) = lswaps(*(x)); \ > - else \ > - swap_uint16((uint16_t *)(x)); \ > + memcpy(&temp, x, 2); \ > + temp = bswap_16(temp); \ > + memcpy(x, &temp, 2); \ > } while (0) > > /* copy 32-bit value from src to dst byteswapping on the way */ > #define cpswapl(src, dst) do { \ > if (sizeof((src)) != 4 || sizeof((dst)) != 4) \ > wrong_size(); \ > - (dst) = lswapl((src)); \ > + (dst) = bswap_32((src)); \ > } while (0) > > /* copy short from src to dst byteswapping on the way */ > #define cpswaps(src, dst) do { \ > if (sizeof((src)) != 2 || sizeof((dst)) != 2) \ > wrong_size(); \ > - (dst) = lswaps((src)); \ > + (dst) = bswap_16((src)); \ > } while (0) > > extern _X_EXPORT void SwapLongs(CARD32 *list, unsigned long count); > diff --git a/os/io.c b/os/io.c > index 8aa51a1070f1..46c7e23715ff 100644 > --- a/os/io.c > +++ b/os/io.c > @@ -108,12 +108,12 @@ static ConnectionOutputPtr FreeOutputs = > (ConnectionOutputPtr) NULL; > static OsCommPtr AvailableInput = (OsCommPtr) NULL; > > #define get_req_len(req,cli) ((cli)->swapped ? \ > - lswaps((req)->length) : (req)->length) > + bswap_16((req)->length) : (req)->length) > > #include <X11/extensions/bigreqsproto.h> > > #define get_big_req_len(req,cli) ((cli)->swapped ? \ > - lswapl(((xBigReq *)(req))->length) : \ > + bswap_32(((xBigReq *)(req))->length) : \ > ((xBigReq *)(req))->length) > > #define BUFSIZE 16384 > -- > 2.11.0 > > _______________________________________________ > xorg-devel@lists.x.org: X.Org development > Archives: http://lists.x.org/archives/xorg-devel > Info: https://lists.x.org/mailman/listinfo/xorg-devel > _______________________________________________ xorg-devel@lists.x.org: X.Org development Archives: http://lists.x.org/archives/xorg-devel Info: https://lists.x.org/mailman/listinfo/xorg-devel