Bis repetita placent.
Since PR 45294 got reclassified as a duplicate of PR 41323 and
a) chances for a new GCC specific intrinsic are null
b) unlike H.J. Lu i couldn't care less about ICC
c) i think GCC oughts to do the Right Thing
d) or at least have builtins match hardware (because inline asm gets no
scheduling love)
i'm reporting the same undue extensions this time for the whole class of
extraction ops.
$ cat pextr.cc
#include <smmintrin.h>
typedef long unsigned int uint64_t;
uint64_t foo8(__m128i x) { return _mm_extract_epi8(x, 4); }
uint64_t foo16(__m128i x) { return _mm_extract_epi16(x, 3); }
uint64_t foo32(__m128i x) { return _mm_extract_epi32(x, 2); }
uint64_t bar8(__v16qi x) { return __builtin_ia32_vec_ext_v16qi(x, 4); }
uint64_t bar16(__v8hi x) { return __builtin_ia32_vec_ext_v8hi(x, 3); }
uint64_t bar32(__v4si x) { return __builtin_ia32_vec_ext_v4si(x, 2); }
int main() { return 0; }
$ /usr/local/gcc-4.6-20100811/bin/g++ -O3 -march=native pextr.cc
00000000004004a0 <_Z4foo8Dv2_x>:
  4004a0:       66 0f 3a 14 c0 04       pextrb $0x4,%xmm0,%eax
  4004a6:       0f be c0                movsbl %al,%eax
  4004a9:       48 98                   cltq   
  4004ab:       c3                      retq   

00000000004004b0 <_Z5foo16Dv2_x>:
  4004b0:       66 0f c5 c0 03          pextrw $0x3,%xmm0,%eax
  4004b5:       98                      cwtl   
  4004b6:       48 98                   cltq   
  4004b8:       c3                      retq   

00000000004004c0 <_Z5foo32Dv2_x>:
  4004c0:       66 0f 3a 16 c0 02       pextrd $0x2,%xmm0,%eax
  4004c6:       48 98                   cltq   
  4004c8:       c3                      retq   

00000000004004d0 <_Z4bar8Dv16_c>:
  4004d0:       66 0f 3a 14 c0 04       pextrb $0x4,%xmm0,%eax
  4004d6:       48 0f be c0             movsbq %al,%rax
  4004da:       c3                      retq   

00000000004004e0 <_Z5bar16Dv8_s>:
  4004e0:       66 0f c5 c0 03          pextrw $0x3,%xmm0,%eax
  4004e5:       48 0f bf c0             movswq %ax,%rax
  4004e9:       c3                      retq   

00000000004004f0 <_Z5bar32Dv4_i>:
  4004f0:       66 0f 3a 16 c0 02       pextrd $0x2,%xmm0,%eax
  4004f6:       48 98                   cltq   
  4004f8:       c3                      retq   

x86-64, Intel i7.


-- 
           Summary: pextr{b,w,d}, (worse than) redundant extensions
           Product: gcc
           Version: 4.6.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: target
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: tbptbp at gmail dot com


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45336

Reply via email to