================
@@ -211,6 +211,58 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction 
&CGF,
   llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
 }
 
+static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
+                                        const CallExpr *E) {
+  Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
+  QualType Op0Ty = E->getArg(0)->getType();
+  llvm::Type *ResType = CGF.IntTy;
+  uint64_t NumElements = 0;
+  if (Op0->getType()->isVectorTy()) {
+    NumElements =
+        E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
+    ResType =
+        llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
+  }
+  if (!Op0Ty->hasFloatingRepresentation())
+    llvm_unreachable("f32tof16 operand must have a float representation");
+
+  if (CGF.CGM.getTriple().isDXIL())
+    return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16,
+                                       ArrayRef<Value *>{Op0}, nullptr,
+                                       "hlsl.f32tof16");
+
+  if (CGF.CGM.getTriple().isSPIRV()) {
+    // We use the SPIRV PackHalf2x16 operation to avoid the need for the
+    // Int16 and Float16 capabilities
+    auto PackType =
+        llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+    if (NumElements == 0) {
+      // a scalar input - simply insert the scalar in the first element
+      // of the 2 element float vector
+      Value *Float2 = Constant::getNullValue(PackType);
+      Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0);
+      Value *Result = CGF.Builder.CreateIntrinsic(
+          ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+      return Result;
+    } else {
+      // a vector input - build a congruent output vector by iterating through
+      // the input vector calling packhalf2x16 for each element
+      Value *Result = PoisonValue::get(ResType);
+      for (uint64_t i = 0; i < NumElements; i++) {
----------------
farzonl wrote:

if you use vscode, you can install the clangd for vscode extention which will 
let you apply clang-tidy linter suggestions like this.

https://github.com/llvm/llvm-project/pull/172469
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to