@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown
-target-cpu gfx940 -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef unsigned int u32;
+
+void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst,
u32
@@ -2537,6 +2537,47 @@ static RValue
EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
}
+static void buildInstrinsicCallArgs(CodeGenFunction , const CallExpr *E,
arsenm wrote:
Shouldn't need any
@@ -19040,6 +19040,48 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown
-target-cpu gfx940 -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
arsenm wrote:
Test belongs in SemaOpenCL
https://github.com/llvm/llvm-project/pull/93064
@@ -19040,6 +19040,48 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+
@@ -19040,6 +19040,48 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+
@@ -19040,6 +19040,48 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+
@@ -678,6 +680,49 @@ class SIMemoryLegalizer final : public MachineFunctionPass
{
bool runOnMachineFunction(MachineFunction ) override;
};
+static const StringMap ASNames = {{
+{"global", SIAtomicAddrSpace::GLOBAL},
+{"local", SIAtomicAddrSpace::LDS},
+}};
+
+void
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/78572
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> Then I guess the MMRA should just have "global" and "local" for now, we can
> always add more later if needed. What do you think?
Yes, we don't have specific image counters. They are just vcmnt
https://github.com/llvm/llvm-project/pull/78572
@@ -240,6 +240,7 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16,
"V2sV2s*0V2s", "t", "at
TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t",
"atomic-global-pk-add-bf16-inst")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16,
arsenm wrote:
> I thought image memory = private. It's unclear to me, what AS does OpenCL
> IMAGE memory map to in our backend? (But otherwise, yes, MMRA should just
> have the backend names, the mapping of the OpenCL IMAGE to a backend AS
> should be in the device-lib)
Images are global
arsenm wrote:
> @arsenm Should we use `image` or `private`? We could allow both in the
> frontend, and only use `private` as the canonical MMRA.
I don't understand why image would imply private. I would just keep at as
private throughout
https://github.com/llvm/llvm-project/pull/78572
@@ -5433,7 +5450,16 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper
,
? Src0
: B.buildBitcast(LLT::scalar(Size),
Src0).getReg(0);
Src0 = B.buildAnyExt(S32, Src0Cast).getReg(0);
-if (Src2.isValid()) {
@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
+ case AMDGPU::BI__builtin_amdgcn_permlane16:
+ case
@@ -18479,6 +18479,25 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
+ case AMDGPU::BI__builtin_amdgcn_permlane16:
+ case
https://github.com/arsenm commented:
On this and the previous, can you add a section to AMDGPUUsage for the
intrinsics and what types they support
https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/92725
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
https://github.com/arsenm requested changes to this pull request.
There should be no need to introduce same-sized value casts, whether bitcast or
ptrtoint in either legalizer
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing
@@ -6086,6 +6086,62 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5387,6 +5387,192 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,25 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+//RUN: %clang_cc1 %s -emit-llvm -O1 -o - | FileCheck %s
arsenm wrote:
codegen tests need an explicit target
@@ -6086,6 +6086,62 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
@@ -780,14 +780,22 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32",
VOP_I32_I32_I32_ARITH, null_frag,
// These are special and do not read the exec mask.
let isConvergent = 1, Uses = [] in {
-def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
- [(set
@@ -243,11 +243,16 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped,
untyped]> {
// FIXME: Specify SchedRW for READFIRSTLANE_B32
// TODO: There is VOP3 encoding also
def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32",
VOP_READFIRSTLANE,
-
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/92294
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> It's still used:
>
> ```
> /work/kparzysz/git/llvm.org/mlir/lib/Target/LLVM/ROCDL/Target.cpp: In member
> function ‘std::optional >
> mlir::ROCDL::SerializeGPUModuleBase::assembleIsa(llvm::StringRef)’:
> /work/kparzysz/git/llvm.org/mlir/lib/Target/LLVM/ROCDL/Target.cpp:302:15:
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/92232
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
@@ -1586,6 +1586,12 @@ class CodeGenModule : public CodeGenTypeCache {
void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
@@ -6086,6 +6086,68 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -3400,7 +3400,7 @@ def : GCNPat<
// FIXME: Should also do this for readlane, but tablegen crashes on
// the ignored src1.
def : GCNPat<
- (int_amdgcn_readfirstlane (i32 imm:$src)),
+ (i32 (AMDGPUreadfirstlane (i32 imm:$src))),
arsenm wrote:
We might need
@@ -5387,6 +5387,212 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5387,6 +5387,212 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -58,7 +58,7 @@ CHECK-CNT3-NOT: {{^}}this is duplicate
CHECK-CNT4-COUNT-5: this is duplicate
CHECK-CNT4-EMPTY:
-Many-label:
+Many-LABEL:
arsenm wrote:
I would be careful about touching FileCheck tests. The point might be the wrong
label
@@ -4408,6 +4409,42 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence.
+
+*
https://github.com/arsenm commented:
amdgpu changes lgtm
https://github.com/llvm/llvm-project/pull/91854
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> > (You can even place `.quad sym[0].hash; .long sym[0].size` in a section
> > `SHF_LINK_ORDER` linking to the global variable for linker garbage
> > collection.)
> > The runtime can build a map correlating hashes to sizes, which can be used
> > to answer variable size queries.
@@ -2176,26 +2176,23 @@ def int_amdgcn_wave_reduce_umin : AMDGPUWaveReduce;
def int_amdgcn_wave_reduce_umax : AMDGPUWaveReduce;
def int_amdgcn_readfirstlane :
- ClangBuiltin<"__builtin_amdgcn_readfirstlane">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,153 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+kernel void test () {
+
+ int sgpr = 0, vgpr = 0, imm = 0;
+
+ // sgpr constraints
+ __asm__
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+kernel void test () {
+
+ int sgpr = 0, vgpr = 0, imm = 0;
+
+ // sgpr constraints
+ __asm__
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+kernel void test () {
+
+ int sgpr = 0, vgpr = 0, imm = 0;
+
+ // sgpr constraints
+ __asm__
@@ -2658,21 +2676,102 @@
IGroupLPDAGMutation::invertSchedBarrierMask(SchedGroupMask Mask) const {
return InvertedMask;
}
+void IGroupLPDAGMutation::addSchedGroupBarrierRules() {
+
+ /// Whether or not the instruction has no true data predecessors
+ /// with opcode \p
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/85304
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1284,7 +1284,29 @@ The AMDGPU backend implements the following LLVM IR
intrinsics.
| ``// 5 MFMA``
|
``__builtin_amdgcn_sched_group_barrier(8, 5, 0)``
-
https://github.com/arsenm commented:
I don't understand how anyone is supposed to use this. This is exposing
extremely specific, random low level details of the scheduling. Users claim
they want scheduling controls, but what they actually want is the scheduler to
just do the right thing. We
@@ -247,7 +247,7 @@ Address CodeGen::emitMergePHI(CodeGenFunction , Address
Addr1,
bool CodeGen::isEmptyField(ASTContext , const FieldDecl *FD,
bool AllowArrays, bool AsIfNoUniqueAddr) {
- if (FD->isUnnamedBitField())
+ if
@@ -157,7 +157,7 @@ llvm::Value
*CodeGen::emitRoundPointerUpToAlignment(CodeGenFunction ,
llvm::Value *RoundUp = CGF.Builder.CreateConstInBoundsGEP1_32(
CGF.Builder.getInt8Ty(), Ptr, Align.getQuantity() - 1);
return CGF.Builder.CreateIntrinsic(
-
@@ -24,6 +24,7 @@ MODULE_PASS("amdgpu-lower-ctor-dtor",
AMDGPUCtorDtorLoweringPass())
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
MODULE_PASS("amdgpu-unify-metadata",
@@ -0,0 +1,55 @@
+/*=== __clang_hip_device_macro_guards.h - guards for HIP device macros -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier:
@@ -4408,6 +4409,42 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence.
+
+*
@@ -1,22 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
+// RUN:
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/90925
>From 4760ebce0ff7725f4bb75f5107f551d867e4db6d Mon Sep 17 00:00:00 2001
From: Ellis Hoag
Date: Thu, 2 May 2024 17:47:38 -0700
Subject: [PATCH 1/4] [modules] Accept equivalent module caches from different
symlink
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?=
Message-ID:
In-Reply-To:
@@ -1586,6 +1586,12 @@ class CodeGenModule : public CodeGenTypeCache {
void AddGlobalDtor(llvm::Function *Dtor, int
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?= ,
Nathan =?utf-8?q?Gau=C3=ABr?=
Message-ID:
In-Reply-To:
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/88918
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/88918
___
cfe-commits mailing list
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?= ,
Nathan =?utf-8?q?Gauër?=
Message-ID:
In-Reply-To:
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/88918
___
cfe-commits mailing list
@@ -504,3 +508,16 @@ def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1,
node:$src2, node:$vcc
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
(AMDGPUperm_impl node:$src0, node:$src1,
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5982,6 +5982,68 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -5386,6 +5386,130 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]])
arsenm wrote:
> ping Ping Do you have another review comment?
This has now confused me. You should roll back to the case where you only
changed the scalar behavior. Any vector behavior change should be a separate
PR, if that is even correct. I would still like to know what the gcc behavior
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/90994
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -504,3 +508,15 @@ def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1,
node:$src2, node:$vcc
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
(AMDGPUperm_impl node:$src0, node:$src1,
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -6091,6 +5982,70 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
@@ -5386,6 +5386,94 @@ bool
AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper ,
return true;
}
+bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper ,
+ MachineInstr ,
+
@@ -6091,6 +5982,70 @@ static SDValue lowerBALLOTIntrinsic(const
SITargetLowering , SDNode *N,
DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
}
+static SDValue lowerLaneOp(const SITargetLowering , SDNode *N,
+ SelectionDAG ) {
arsenm wrote:
> I'm now wondering if adding a new builtin is needed at all, or if it should
> just be part of the original builtin? It's an additive change.
Maybe?
>
> Should we also rename the MMRA to `amdgpu-fence-as` (remove OpenCL from the
> name) ?
>
I definitely do not want to
@@ -316,3 +316,82 @@ define <2 x i32> @test_trunc_both_reversed_vector(<2 x
i64> %a) {
%res = trunc nsw nuw <2 x i64> %a to <2 x i32>
ret <2 x i32> %res
}
+
+define ptr @gep_nuw(ptr %p, i64 %idx) {
+; CHECK: %gep = getelementptr nuw i8, ptr %p, i64 %idx
+ %gep =
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89477
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1462,6 +1460,14 @@ floating point semantic models: precise (the default),
strict, and fast.
"allow_approximate_fns", "off", "off", "on"
"allow_reassociation", "off", "off", "on"
+The ``-fp-model`` option does not modify the "fdenormal-fp-math" or
@@ -18319,6 +18320,26 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned
BuiltinID,
return nullptr;
}
+void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
+llvm::Value *ASMask) {
+ constexpr const
@@ -4403,6 +4404,60 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--
+
+__builtin_amdgcn_fence
+^^
+
+``__builtin_amdgcn_fence`` emits a fence for
@@ -1906,7 +1909,15 @@ Value
*ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
} else {
assert(SrcEltTy->isFloatingPointTy() && DstEltTy->isFloatingPointTy() &&
"Unknown real conversion");
-if (DstEltTy->getTypeID() <
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]])
@@ -1,3 +1,6 @@
-__kernel void foo(int *i) {
+// RUN: %clang -emit-llvm -S -o - %s | FileCheck %s
arsenm wrote:
It is essential that clang should know about libclc. libclc exists purely as an
extension of the compiler. From the user perspective the opencl
@@ -0,0 +1,165 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -disable-O0-optnone
-emit-llvm \
+// RUN: %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+// CHECK-LABEL: define dso_local half @test_convert_from_bf16_to_fp16(
+// CHECK-SAME: bfloat noundef [[A:%.*]])
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/80475
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> Some of the GPU targets, IIRC, want daz/ftz by default. Not all targets have
> DAZ/FTZ bits that can be set; I think RISC-V is in this category, although to
> be honest, trying to track down all the ISA extensions to make sure is a bit
> beyond my ken.
>
OpenCL allows you to
@@ -842,25 +842,6 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList
,
ToolChain::addProfileRTLibs(Args, CmdArgs);
}
-llvm::DenormalMode
-Linux::getDefaultDenormalModeForType(const llvm::opt::ArgList ,
- const JobAction ,
-
@@ -18763,19 +18763,28 @@ Value
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
// scheduling builtins
case AMDGPU::BI__builtin_amdgcn_sched_group_barrier: {
-return E->getNumArgs() == 3
- ? Builder.CreateCall(
-
@@ -18763,19 +18763,28 @@ Value
*CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
// scheduling builtins
case AMDGPU::BI__builtin_amdgcn_sched_group_barrier: {
-return E->getNumArgs() == 3
- ? Builder.CreateCall(
-
301 - 400 of 1270 matches
Mail list logo