@@ -1108,3 +1108,8 @@ void
GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() {
return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs;
}
+
+SmallVector
arsenm wrote:
std::array<3>?
@@ -356,6 +356,19 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr()) {
+uint32_t X = Attr->getNumWorkGroupsX();
+uint32_t Y =
https://github.com/arsenm commented:
Needs documentation in AMDGPUUsage. Should also clarify behavior of 0
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -312,6 +312,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
break;
}
+ case Intrinsic::readfixedtimer: {
+errs() << "WARNING: this target does not support the
@@ -3759,7 +3759,6 @@ def CALL_PROTOTYPE :
include "NVPTXIntrinsics.td"
-
arsenm wrote:
Random whitespace change
https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
https://github.com/arsenm commented:
Missing the clang builtin test
https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/81331
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/76955
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/81083
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -5908,7 +5908,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl
GD, unsigned BuiltinID,
}
}
-assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
+assert(ArgValue->getType()->canLosslesslyBitCastTo(PTy) &&
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/81083
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -167,6 +167,10 @@ def FeatureCuMode : SubtargetFeature<"cumode",
"Enable CU wavefront execution mode"
>;
+def FeaturePreciseMemory
arsenm wrote:
The subtarget feature prefix should be removed. The subtarget feature name is
not the user facing component
@@ -0,0 +1,199 @@
+; Testing the -amdgpu-precise-memory-op option
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+amdgpu-precise-memory-op
-verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX9
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -mattr=+amdgpu-precise-memory-op
https://github.com/arsenm commented:
I think this needs codegen tests for the gfx900 vs. gfx906 mad_mix/fma_fix
issue
https://github.com/llvm/llvm-project/pull/76955
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
@@ -2819,11 +2819,11 @@ def int_amdgcn_fdot2_f16_f16 :
def int_amdgcn_fdot2_bf16_bf16 :
ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">,
DefaultAttrsIntrinsic<
-[llvm_i16_ty], // %r
+[llvm_bfloat_ty], // %r
arsenm wrote:
Changing the clang
@@ -2835,8 +2835,8 @@ def int_amdgcn_fdot2_f32_bf16 :
DefaultAttrsIntrinsic<
[llvm_float_ty], // %r
[
- llvm_v2i16_ty, // %a
- llvm_v2i16_ty, // %b
+ llvm_v2bf16_ty, // %a
+ llvm_v2bf16_ty, // %b
arsenm wrote:
For potential
@@ -1562,8 +1562,9 @@ bool IRTranslator::translateBitCast(const User ,
bool IRTranslator::translateCast(unsigned Opcode, const User ,
MachineIRBuilder ) {
- if (U.getType()->getScalarType()->isBFloatTy() ||
-
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s
+
+v_dot2_bf16_bf16 v5, v1, v2, 100.0
arsenm wrote:
does this help with #79369 at all?
@@ -1562,8 +1562,9 @@ bool IRTranslator::translateBitCast(const User ,
bool IRTranslator::translateCast(unsigned Opcode, const User ,
MachineIRBuilder ) {
- if (U.getType()->getScalarType()->isBFloatTy() ||
-
arsenm wrote:
Next piece in #81108
https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm created
https://github.com/llvm/llvm-project/pull/81108
This completes the unrevert of ef388334ee5a3584255b9ef5b3fefdb244fa3fd7.
>From 7b5b50597e13c647ec70beab35dcc9b643bff42f Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 8 Feb 2024 14:15:33 +0530
Subject:
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74056
>From 9be777d5b39852cf3c0b2538fd5f712922672caa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 1 Dec 2023 18:00:13 +0900
Subject: [PATCH 1/4] Reapply "InstCombine: Introduce
SimplifyDemandedUseFPClass""
@@ -0,0 +1,273 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-cpu x86-64-v4
-std=c23 -O1 -ffreestanding -emit-llvm -o - %s | FileCheck %s
+
+//
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -Wno-varargs -O1
-disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt
-passes="expand-variadics,default" -S | FileCheck %s
--check-prefixes=CHECK,X86Linux
arsenm wrote:
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout =
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout =
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout =
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty,
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty,
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty,
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty,
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty,
arsenm wrote:
> I don't know why it fails:
>
> ```
> error: patch failed: llvm/lib/Transforms/InstCombine/InstCombineInternal.h:551
> error: llvm/lib/Transforms/InstCombine/InstCombineInternal.h: patch does not
> apply
> error: patch failed:
>
arsenm wrote:
> @arsenm Can you rebase this patch first?
It was already fresh, I just re-merged again with no conflicts
https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74056
>From 9be777d5b39852cf3c0b2538fd5f712922672caa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 1 Dec 2023 18:00:13 +0900
Subject: [PATCH 1/2] Reapply "InstCombine: Introduce
SimplifyDemandedUseFPClass""
@@ -285,6 +289,20 @@ void
NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
+
+llvm::Constant *
+NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule ,
+
@@ -285,6 +289,20 @@ void
NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
+
+llvm::Constant *
+NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule ,
+
arsenm wrote:
> Do you only set the register for kernel entries?
Yes, it's the pre-initialized state. Non kernels can't be arbitrarily invoked
from the host
> Is the attribute ignored for other functions?
No, it's an informative attribute about that the mode is. The compiler isn't
trying
arsenm wrote:
> > So, alternatively...we could just go with the simplest solution, and use
> > "ieee" as the default even under -ffast-math.
>
+1. There hasn't been a performance reason to use FTZ/DAZ since ~2011. Maybe
there's still a power benefit? But in that case you could still
arsenm wrote:
> @arsenm Are you suggesting that these should instead be a range of
> minimum/maximum number of workitems globally?
That's how all of the other attributes we already have do this.
amdgpu-waves-per-eu is a single min, max pair. Same with
amdgpu-flat-work-group-size
Although
https://github.com/arsenm requested changes to this pull request.
Is this redundant with #68515? Do we just need to add OpenCL test coverage?
https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
arsenm wrote:
@dtcxzyw are you planning on a codegen patch to improve the backend handling?
https://github.com/llvm/llvm-project/pull/76338
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
arsenm wrote:
ping, I want to get this in and move to remove the flag
https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
amdgpu parts lgtm (which could be split to a separate change from the ptx
change)
https://github.com/llvm/llvm-project/pull/78759
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
@@ -104,3 +106,14 @@ void fun() {
(void) b;
(void) var_host_only;
}
+
+extern __global__ void external_func();
+extern void* const external_dep[] = {
arsenm wrote:
Sounds broken that the behavior would differ between array and non-array ?
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/67104
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm requested changes to this pull request.
One attribute
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
One attribute, with a range, would be better than two attributes. This is how
it is handled in the similar cases.
I also think this should be in terms of work items, not workgroups
https://github.com/llvm/llvm-project/pull/79035
@@ -520,6 +520,104 @@ Every processor supports every OS ABI (see
:ref:`amdgpu-os`) with the following
=== === = =
=== === ==
+Generic processors also exist.
@@ -156,6 +156,12 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const GCNSubtarget = MF->getSubtarget();
const Function = MF->getFunction();
+ // TODO: We're checking this late, would be nice to check it earlier.
+ if (STM.requiresCodeObjectV6() &&
@@ -139,10 +139,10 @@ bool
AMDGPURemoveIncompatibleFunctions::checkFunction(Function ) {
const GCNSubtarget *ST =
static_cast(TM->getSubtargetImpl(F));
- // Check the GPU isn't generic. Generic is used for testing only
- // and we don't want this pass to interfere
@@ -279,13 +279,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions
,
if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
return;
- StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
- :
https://github.com/arsenm commented:
Can we land the infrastructure to allow linking of compiler-rt binaries without
the specifics for divide 128?
https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
arsenm wrote:
> I may have mentioned a few times that I don't like function attributes
> controlling fast-math behaviors.
It doesn't control it, it's informative. You just get undefined behavior if you
end up calling mismatched mode functions.
It does control it in the AMDGPU entry point
@@ -1293,8 +1293,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder
,
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
- else
+ else {
+// For an intrinsic call (e.g. memset), use GOT
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/68515
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> * Which value allows generating the "fastest" math code -- disregarding
> correctness? I'd assume that "dynamic" is least optimizable, "ieee" in the
> middle, and "preserve-sign" is likely to generate the "fastest" code?
This depends on the target and operations. For some
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/68515
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1129,8 +1129,97 @@ struct BitTest {
static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
};
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullopt otherwise.
+std::optional getConvergenceToken(llvm::BasicBlock *BB)
{
+
@@ -1129,8 +1129,97 @@ struct BitTest {
static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
};
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullopt otherwise.
+std::optional getConvergenceToken(llvm::BasicBlock *BB)
{
+
@@ -1293,8 +1293,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder
,
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
- else
+ else {
+// For an intrinsic call (e.g. memset), use GOT
@@ -862,14 +862,18 @@ static void instrumentOneFunc(
auto Name = FuncInfo.FuncNameVar;
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
FuncInfo.FunctionHash);
+ // Make sure that pointer to global is passed in with zero
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/80183
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1025,6 +1025,26 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const
MachineFunction ,
OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
}
+// Helper function to add common PAL Metadata 3.0+
+static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
+
@@ -1127,10 +1131,16 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const
MachineFunction ) {
MD->setFunctionScratchSize(FnName, MFI.getStackSize());
const GCNSubtarget = MF.getSubtarget();
- // Set compute registers
- MD->setRsrc1(CallingConv::AMDGPU_CS,
-
arsenm wrote:
> I wonder if, instead, we should just have `-ffast-math` always downgrade
> `-fdenormal-fp-math=ieee` to `-fdenormal-fp-math=preserve-sign`, under the
> rationale of "you asked for fast math, and preserve-sign mode might let the
> compiler generate faster code"?
This could
arsenm wrote:
> > It looks reasonable to me, although I'm not really an AMDGPU person. /me
> > summons @arsenm ?
>
> AMDGPU backend relies on LLVM passes to translate printf at IR level.
For the OpenCL case only, not for HIP/OpenMP
https://github.com/llvm/llvm-project/pull/68515
@@ -0,0 +1,21 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -disable-llvm-optzns
-mprintf-kind=hostcall -fno-builtin-printf -fcuda-is-device \
+// RUN: -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/80303
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/79980
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> LGTM. Please update PR title before merging
So this was only supposed to add the test, or implement this too?
https://github.com/llvm/llvm-project/pull/71019
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/71019
>From 2477ae87e7bb82b4551e42b8255dfe93dadff453 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap
Date: Thu, 2 Nov 2023 01:05:35 -0400
Subject: [PATCH 1/6] [AMDGPU] Add code model (#70760) test for amdgpu target.
---
@@ -4,13 +4,10 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature
-wavefrontsize64 -verify -S -o - %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s
+// expected-no-diagnostics
+
typedef unsigned long ulong;
void
@@ -151,7 +151,7 @@ BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui",
"nc")
//===--===//
TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "ZUib", "nc", "wavefrontsize32")
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/80102
>From b64f7ba4afc6cbb3e5e34757e6979a0d5ee73e2b Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe
Date: Tue, 30 Jan 2024 11:26:53 +0530
Subject: [PATCH] [AMDGPU] Every convergent operation needs post-isel
@@ -175,6 +175,8 @@ Predefined Macros
- Defined when the GPU default stream is set to per-thread mode.
* - ``HIP_API_PER_THREAD_DEFAULT_STREAM``
- Alias to ``__HIP_API_PER_THREAD_DEFAULT_STREAM__``. Deprecated.
+ * - ``__AMDGCN_WAVEFRONT_SIZE__``
@@ -25,4 +25,4 @@ entry:
}
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
arsenm wrote:
Separate would be better
https://github.com/llvm/llvm-project/pull/79905
https://github.com/arsenm commented:
Also should get a run line that errors due to wavesize?
https://github.com/llvm/llvm-project/pull/79980
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
@@ -1293,8 +1293,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder
,
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
- else
+ else {
+// For an intrinsic call (e.g. memset), use GOT
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/79905
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/79795
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2561,6 +2567,70 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const
SIMemOpInfo ,
return Changed;
}
+bool SIMemoryLegalizer::GFX9InsertWaitcntForPreciseMem(MachineFunction ) {
+ const GCNSubtarget = MF.getSubtarget();
+ const SIInstrInfo *TII =
@@ -2561,6 +2567,70 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const
SIMemOpInfo ,
return Changed;
}
+bool SIMemoryLegalizer::GFX9InsertWaitcntForPreciseMem(MachineFunction ) {
arsenm wrote:
can you just make this happen as a consequence of the
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74056
>From 9be777d5b39852cf3c0b2538fd5f712922672caa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 1 Dec 2023 18:00:13 +0900
Subject: [PATCH 1/2] Reapply "InstCombine: Introduce
SimplifyDemandedUseFPClass""
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74056
>From 9be777d5b39852cf3c0b2538fd5f712922672caa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 1 Dec 2023 18:00:13 +0900
Subject: [PATCH] Reapply "InstCombine: Introduce SimplifyDemandedUseFPClass""
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/66522
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2641,8 +2641,8 @@ define float @assume_false_smallest_normal(float %arg) {
}
define float @clamp_false_nan(float %arg) {
-; CHECK-LABEL: define float @clamp_false_nan(
-; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR2]] {
+; CHECK-LABEL: define nofpclass(nan inf nzero
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/66522
>From 076ab2374d84c4112e0bf3fb11ecda2f5774785e Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Mon, 11 Sep 2023 10:56:40 +0300
Subject: [PATCH 1/7] ValueTracking: Merge fcmpImpliesClass and fcmpToClassTest
501 - 600 of 1174 matches
Mail list logo