[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff e76b257483e6c6743de0fa6eca4d0cc60e08385d 
db1933033fd37bbbab0b845eed53405db365b0e6 -- clang/lib/CodeGen/CGBuiltin.cpp 
llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp 
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp 
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h 
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp 
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h 
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a0f949495e..9ce2f5b6c1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18482,9 +18482,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_permlane16:
   case AMDGPU::BI__builtin_amdgcn_permlanex16: {
 Intrinsic::ID IID;
-IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 
-   ? Intrinsic::amdgcn_permlane16
-   : Intrinsic::amdgcn_permlanex16;
+IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
+  ? Intrinsic::amdgcn_permlane16
+  : Intrinsic::amdgcn_permlanex16;
 
 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index cc4797b42d..b28c3521d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5416,10 +5416,12 @@ bool 
AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
   Register Src3 = MI.getOperand(5).getReg();
   Register Src4 = MI.getOperand(6).getImm();
   Register Src5 = MI.getOperand(7).getImm();
-  return LaneOp.addUse(Src1).addUse(Src2).
-addUse(Src3).
-addImm(Src4).
-addImm(Src5).getReg(0);
+  return LaneOp.addUse(Src1)
+  .addUse(Src2)
+  .addUse(Src3)
+  .addImm(Src4)
+  .addImm(Src5)
+  .getReg(0);
 }
 default:
   llvm_unreachable("unhandled lane op");
@@ -5427,7 +5429,8 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper 
&Helper,
   };
 
   Register Src1, Src2;
-  if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane 
|| IsPermLane16) {
+  if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane 
||
+  IsPermLane16) {
 Src1 = MI.getOperand(3).getReg();
 if (IID == Intrinsic::amdgcn_writelane || IsPermLane16) {
   Src2 = MI.getOperand(4).getReg();
@@ -5514,9 +5517,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper 
&Helper,
 Src0 = IsS16Vec ? B.buildBitcast(S32, Src0Parts.getReg(i)).getReg(0)
 : Src0Parts.getReg(i);
 PartialRes.push_back(
-(B.buildIntrinsic(IID, {S32})
- .addUse(Src0)
- .getReg(0)));
+(B.buildIntrinsic(IID, {S32}).addUse(Src0).getReg(0)));
   }
 
   break;
@@ -5526,7 +5527,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper 
&Helper,
 case Intrinsic::amdgcn_permlanex16: {
   Register Src1 = MI.getOperand(3).getReg();
   Register Src2 = MI.getOperand(4).getReg();
-  
+
   Register SrcX = IsPermLane16 ? Src1 : Src2;
   MachineInstrBuilder SrcXParts;
 
@@ -5547,9 +5548,8 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper 
&Helper,
 : Src0Parts.getReg(i);
 SrcX = IsS16Vec ? B.buildBitcast(S32, SrcXParts.getReg(i)).getReg(0)
 : SrcXParts.getReg(i);
-PartialRes.push_back( IsPermLane16 ?
-createLaneOp(Src0, SrcX, Src2) : 
-createLaneOp(Src0, Src1, SrcX));
+PartialRes.push_back(IsPermLane16 ? createLaneOp(Src0, SrcX, Src2)
+  : createLaneOp(Src0, Src1, SrcX));
   }
 
   break;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9e77d20813..5d34ed089f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6092,35 +6092,36 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, 
SDNode *N,
   unsigned ValSize = VT.getSizeInBits();
   unsigned IntrinsicID = N->getConstantOperandVal(0);
   bool IsPermLane16 = IntrinsicID == Intrinsic::amdgcn_permlane16 ||
-IntrinsicID == Intrinsic::amdgcn_permlanex16;
+  IntrinsicID == Intrinsic::amdgcn_permlanex16;
   bool IsPermLane64 = IntrinsicID == Intrinsic::amdgcn_permlane64;
   SDValue Src0 = N->getOperand(1);
   SDLoc SL(N);
   MVT IntVT = 

[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm commented:

On this and the previous, can you add a section to AMDGPUUsage for the 
intrinsics and what types they support 

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread Matt Arsenault via cfe-commits


@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
 return Builder.CreateCall(F, Args);
   }
+  case AMDGPU::BI__builtin_amdgcn_permlane16:
+  case AMDGPU::BI__builtin_amdgcn_permlanex16: {
+Intrinsic::ID IID;
+IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16

arsenm wrote:

combine declare + define, also can sink down to use 

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread Matt Arsenault via cfe-commits


@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
 return Builder.CreateCall(F, Args);
   }
+  case AMDGPU::BI__builtin_amdgcn_permlane16:
+  case AMDGPU::BI__builtin_amdgcn_permlanex16: {
+Intrinsic::ID IID;
+IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
+  ? Intrinsic::amdgcn_permlane16
+  : Intrinsic::amdgcn_permlanex16;
+
+llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
+llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
+llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));

arsenm wrote:

I assume EmitScalarExpr handles the immargs correctly? 

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-20 Thread Matt Arsenault via cfe-commits


@@ -5433,7 +5450,16 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper 
&Helper,
 ? Src0
 : B.buildBitcast(LLT::scalar(Size), 
Src0).getReg(0);
 Src0 = B.buildAnyExt(S32, Src0Cast).getReg(0);
-if (Src2.isValid()) {
+
+if (IsPermLane16) {
+  Register Src1Cast =
+  MRI.getType(Src1).isScalar()
+  ? Src1
+  : B.buildBitcast(LLT::scalar(Size), Src2).getReg(0);

arsenm wrote:

Like the other patch, shouldn't need any bitcasts 

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-26 Thread Vikram Hegde via cfe-commits


@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
 return Builder.CreateCall(F, Args);
   }
+  case AMDGPU::BI__builtin_amdgcn_permlane16:
+  case AMDGPU::BI__builtin_amdgcn_permlanex16: {
+Intrinsic::ID IID;
+IID = BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
+  ? Intrinsic::amdgcn_permlane16
+  : Intrinsic::amdgcn_permlanex16;
+
+llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
+llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
+llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));

vikramRH wrote:

yes

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-26 Thread Vikram Hegde via cfe-commits


@@ -5433,7 +5450,16 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper 
&Helper,
 ? Src0
 : B.buildBitcast(LLT::scalar(Size), 
Src0).getReg(0);
 Src0 = B.buildAnyExt(S32, Src0Cast).getReg(0);
-if (Src2.isValid()) {
+
+if (IsPermLane16) {
+  Register Src1Cast =
+  MRI.getType(Src1).isScalar()
+  ? Src1
+  : B.buildBitcast(LLT::scalar(Size), Src2).getReg(0);

vikramRH wrote:

Yes, I will take over the changes from 
https://github.com/llvm/llvm-project/pull/89217 once finalized,

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-29 Thread Vikram Hegde via cfe-commits

https://github.com/vikramRH edited 
https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-29 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

1. Added/updated tests for permlanex16, permlane64
2. This needs https://github.com/llvm/llvm-project/pull/89217 to land first so 
that only incremental changes can be reviewed. 

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-05-29 Thread Matt Arsenault via cfe-commits


@@ -18479,6 +18479,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
 return Builder.CreateCall(F, Args);
   }
+  case AMDGPU::BI__builtin_amdgcn_permlane16:
+  case AMDGPU::BI__builtin_amdgcn_permlanex16: {
+llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));

arsenm wrote:

If there's really not a helper to just EmitScalarExpr for N arguments, there 
should be one used here 

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-06-17 Thread Vikram Hegde via cfe-commits


@@ -18479,6 +18479,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
 return Builder.CreateCall(F, Args);
   }
+  case AMDGPU::BI__builtin_amdgcn_permlane16:
+  case AMDGPU::BI__builtin_amdgcn_permlanex16: {
+llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));

vikramRH wrote:

added a new helper

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU][WIP] Extend permlane16, permlanex16 and permlane64 intrinsic lowering for generic types (PR #92725)

2024-06-17 Thread Vikram Hegde via cfe-commits

vikramRH wrote:

Updated this PR to be in sync with #89217, However still plan is to land this 
land this only after changes in #89217 are accepted.

https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits