Re: r229348 - [X86] Teach clang to lower __builtin_ia32_psrldqi256 and __builtin_ia32_pslldqi256 to vector shuffles the backend recognizes. This is a step towards removing the corresponding intrinsics from the backend.

Craig Topper Sun, 15 Feb 2015 17:12:46 -0800

Possibly. Lane splits make it a lot uglier than SSE2. I'm also somewhat
concerned about the complete loss of size checking on the immediate. For
instance 256 aliases to 0 in SSE2 case now. I hadn't thought about that
when I reviewed your patch.


On Sun, Feb 15, 2015 at 4:56 PM, Filipe Cabecinhas <[email protected]> wrote:

> Hi Craig,
>
> Could this be done in the headers, like the 128-bit ones?
> That way we could get rid of the builtins, no?
>
> Thanks,
>
>   Filipe
>
> On Sun, Feb 15, 2015 at 4:42 PM, Craig Topper <[email protected]>
> wrote:
>
>> Author: ctopper
>> Date: Sun Feb 15 18:42:49 2015
>> New Revision: 229348
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=229348&view=rev
>> Log:
>> [X86] Teach clang to lower __builtin_ia32_psrldqi256 and
>> __builtin_ia32_pslldqi256 to vector shuffles the backend recognizes. This
>> is a step towards removing the corresponding intrinsics from the backend.
>>
>> Modified:
>>     cfe/trunk/lib/CodeGen/CGBuiltin.cpp
>>     cfe/trunk/test/CodeGen/avx2-builtins.c
>>
>> Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=229348&r1=229347&r2=229348&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
>> +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Feb 15 18:42:49 2015
>> @@ -6025,6 +6025,60 @@ Value *CodeGenFunction::EmitX86BuiltinEx
>>      // If palignr is shifting the pair of vectors more than 32 bytes,
>> emit zero.
>>      return llvm::Constant::getNullValue(ConvertType(E->getType()));
>>    }
>> +  case X86::BI__builtin_ia32_pslldqi256: {
>> +    // Shift value is in bits so divide by 8.
>> +    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue()
>> >> 3;
>> +
>> +    // If pslldq is shifting the vector more than 15 bytes, emit zero.
>> +    if (shiftVal >= 16)
>> +      return llvm::Constant::getNullValue(ConvertType(E->getType()));
>> +
>> +    SmallVector<llvm::Constant*, 32> Indices;
>> +    // 256-bit pslldq operates on 128-bit lanes so we need to handle that
>> +    for (unsigned l = 0; l != 32; l += 16) {
>> +      for (unsigned i = 0; i != 16; ++i) {
>> +        unsigned Idx = 32 + i - shiftVal;
>> +        if (Idx < 32) Idx -= 16; // end of lane, switch operand.
>> +        Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
>> +      }
>> +    }
>> +
>> +    llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
>> +    Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
>> +    Value *Zero = llvm::Constant::getNullValue(VecTy);
>> +
>> +    Value *SV = llvm::ConstantVector::get(Indices);
>> +    SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
>> +    llvm::Type *ResultType = ConvertType(E->getType());
>> +    return Builder.CreateBitCast(SV, ResultType, "cast");
>> +  }
>> +  case X86::BI__builtin_ia32_psrldqi256: {
>> +    // Shift value is in bits so divide by 8.
>> +    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue()
>> >> 3;
>> +
>> +    // If psrldq is shifting the vector more than 15 bytes, emit zero.
>> +    if (shiftVal >= 16)
>> +      return llvm::Constant::getNullValue(ConvertType(E->getType()));
>> +
>> +    SmallVector<llvm::Constant*, 32> Indices;
>> +    // 256-bit psrldq operates on 128-bit lanes so we need to handle that
>> +    for (unsigned l = 0; l != 32; l += 16) {
>> +      for (unsigned i = 0; i != 16; ++i) {
>> +        unsigned Idx = i + shiftVal;
>> +        if (Idx >= 16) Idx += 16; // end of lane, switch operand.
>> +        Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
>> +      }
>> +    }
>> +
>> +    llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
>> +    Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
>> +    Value *Zero = llvm::Constant::getNullValue(VecTy);
>> +
>> +    Value *SV = llvm::ConstantVector::get(Indices);
>> +    SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
>> +    llvm::Type *ResultType = ConvertType(E->getType());
>> +    return Builder.CreateBitCast(SV, ResultType, "cast");
>> +  }
>>    case X86::BI__builtin_ia32_movntps:
>>    case X86::BI__builtin_ia32_movntps256:
>>    case X86::BI__builtin_ia32_movntpd:
>>
>> Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=229348&r1=229347&r2=229348&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
>> +++ cfe/trunk/test/CodeGen/avx2-builtins.c Sun Feb 15 18:42:49 2015
>> @@ -462,7 +462,7 @@ __m256i test_mm256_sign_epi32(__m256i a,
>>  }
>>
>>  __m256i test_mm256_slli_si256(__m256i a) {
>> -  // CHECK: @llvm.x86.avx2.psll.dq
>> +  // CHECK: shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}},
>> <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36,
>> i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32
>> 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55,
>> i32 56, i32 57, i32 58, i32 59, i32 60>
>>    return _mm256_slli_si256(a, 3);
>>  }
>>
>> @@ -517,7 +517,7 @@ __m256i test_mm256_sra_epi32(__m256i a,
>>  }
>>
>>  __m256i test_mm256_srli_si256(__m256i a) {
>> -  // CHECK: @llvm.x86.avx2.psrl.dq
>> +  // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer,
>> <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32
>> 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20,
>> i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32
>> 30, i32 31, i32 48, i32 49, i32 50>
>>    return _mm256_srli_si256(a, 3);
>>  }
>>
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> [email protected]
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>
>
>


-- 
~Craig

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Re: r229348 - [X86] Teach clang to lower __builtin_ia32_psrldqi256 and __builtin_ia32_pslldqi256 to vector shuffles the backend recognizes. This is a step towards removing the corresponding intrinsics from the backend.

Reply via email to