@@ -5908,7 +5908,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl
GD, unsigned BuiltinID,
}
}
-assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
+assert(ArgValue->getType()->canLosslesslyBitCastTo(PTy) &&
--
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/81083
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -167,6 +167,10 @@ def FeatureCuMode : SubtargetFeature<"cumode",
"Enable CU wavefront execution mode"
>;
+def FeaturePreciseMemory
arsenm wrote:
The subtarget feature prefix should be removed. The subtarget feature name is
not the user facing component
@@ -0,0 +1,199 @@
+; Testing the -amdgpu-precise-memory-op option
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+amdgpu-precise-memory-op
-verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX9
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -mattr=+amdgpu-precise-memory-op
-v
https://github.com/arsenm commented:
I think this needs codegen tests for the gfx900 vs. gfx906 mad_mix/fma_fix
issue
https://github.com/llvm/llvm-project/pull/76955
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-b
@@ -2819,11 +2819,11 @@ def int_amdgcn_fdot2_f16_f16 :
def int_amdgcn_fdot2_bf16_bf16 :
ClangBuiltin<"__builtin_amdgcn_fdot2_bf16_bf16">,
DefaultAttrsIntrinsic<
-[llvm_i16_ty], // %r
+[llvm_bfloat_ty], // %r
arsenm wrote:
Changing the clang bui
@@ -2835,8 +2835,8 @@ def int_amdgcn_fdot2_f32_bf16 :
DefaultAttrsIntrinsic<
[llvm_float_ty], // %r
[
- llvm_v2i16_ty, // %a
- llvm_v2i16_ty, // %b
+ llvm_v2bf16_ty, // %a
+ llvm_v2bf16_ty, // %b
arsenm wrote:
For potential revert
@@ -1562,8 +1562,9 @@ bool IRTranslator::translateBitCast(const User &U,
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
- if (U.getType()->getScalarType()->isBFloatTy() ||
- U.getOperand(0
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s
+
+v_dot2_bf16_bf16 v5, v1, v2, 100.0
arsenm wrote:
does this help with #79369 at all?
http
@@ -1562,8 +1562,9 @@ bool IRTranslator::translateBitCast(const User &U,
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
- if (U.getType()->getScalarType()->isBFloatTy() ||
- U.getOperand(0
arsenm wrote:
Next piece in #81108
https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm created
https://github.com/llvm/llvm-project/pull/81108
This completes the unrevert of ef388334ee5a3584255b9ef5b3fefdb244fa3fd7.
>From 7b5b50597e13c647ec70beab35dcc9b643bff42f Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Thu, 8 Feb 2024 14:15:33 +0530
Subject:
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74056
>From 9be777d5b39852cf3c0b2538fd5f712922672caa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 1 Dec 2023 18:00:13 +0900
Subject: [PATCH 1/4] Reapply "InstCombine: Introduce
SimplifyDemandedUseFPClass""
@@ -0,0 +1,273 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-cpu x86-64-v4
-std=c23 -O1 -ffreestanding -emit-llvm -o - %s | FileCheck %s
+
+// Th
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -Wno-varargs -O1
-disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt
-passes="expand-variadics,default" -S | FileCheck %s
--check-prefixes=CHECK,X86Linux
arsenm wrote:
ca
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -0,0 +1,589 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --function-signature
+; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s
+target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -0,0 +1,698 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty, FPClassTe
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty, FPClassTe
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty, FPClassTe
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty, FPClassTe
@@ -1877,3 +1877,139 @@ Value
*InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return MadeChange ? I : nullptr;
}
+
+/// For floating-point classes that resolve to a single bit pattern, return
that
+/// value.
+static Constant *getFPClassConstant(Type *Ty, FPClassTe
arsenm wrote:
> I don't know why it fails:
>
> ```
> error: patch failed: llvm/lib/Transforms/InstCombine/InstCombineInternal.h:551
> error: llvm/lib/Transforms/InstCombine/InstCombineInternal.h: patch does not
> apply
> error: patch failed:
> llvm/lib/Transforms/InstCombine/InstCombineSimplif
arsenm wrote:
> @arsenm Can you rebase this patch first?
It was already fresh, I just re-merged again with no conflicts
https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74056
>From 9be777d5b39852cf3c0b2538fd5f712922672caa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 1 Dec 2023 18:00:13 +0900
Subject: [PATCH 1/2] Reapply "InstCombine: Introduce
SimplifyDemandedUseFPClass""
@@ -285,6 +289,20 @@ void
NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
+
+llvm::Constant *
+NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
+
@@ -285,6 +289,20 @@ void
NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
+
+llvm::Constant *
+NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
+
arsenm wrote:
> Do you only set the register for kernel entries?
Yes, it's the pre-initialized state. Non kernels can't be arbitrarily invoked
from the host
> Is the attribute ignored for other functions?
No, it's an informative attribute about that the mode is. The compiler isn't
trying t
arsenm wrote:
> > So, alternatively...we could just go with the simplest solution, and use
> > "ieee" as the default even under -ffast-math.
>
+1. There hasn't been a performance reason to use FTZ/DAZ since ~2011. Maybe
there's still a power benefit? But in that case you could still explicitl
arsenm wrote:
> @arsenm Are you suggesting that these should instead be a range of
> minimum/maximum number of workitems globally?
That's how all of the other attributes we already have do this.
amdgpu-waves-per-eu is a single min, max pair. Same with
amdgpu-flat-work-group-size
Although thi
https://github.com/arsenm requested changes to this pull request.
Is this redundant with #68515? Do we just need to add OpenCL test coverage?
https://github.com/llvm/llvm-project/pull/72554
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https:
arsenm wrote:
@dtcxzyw are you planning on a codegen patch to improve the backend handling?
https://github.com/llvm/llvm-project/pull/76338
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-com
arsenm wrote:
ping, I want to get this in and move to remove the flag
https://github.com/llvm/llvm-project/pull/74056
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
amdgpu parts lgtm (which could be split to a separate change from the ptx
change)
https://github.com/llvm/llvm-project/pull/78759
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lis
@@ -104,3 +106,14 @@ void fun() {
(void) b;
(void) var_host_only;
}
+
+extern __global__ void external_func();
+extern void* const external_dep[] = {
arsenm wrote:
Sounds broken that the behavior would differ between array and non-array ?
https://github.c
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/67104
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm requested changes to this pull request.
One attribute
https://github.com/llvm/llvm-project/pull/79035
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm commented:
One attribute, with a range, would be better than two attributes. This is how
it is handled in the similar cases.
I also think this should be in terms of work items, not workgroups
https://github.com/llvm/llvm-project/pull/79035
___
@@ -520,6 +520,104 @@ Every processor supports every OS ABI (see
:ref:`amdgpu-os`) with the following
=== === = =
=== === ==
+Generic processors also exist.
---
@@ -156,6 +156,12 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const GCNSubtarget &STM = MF->getSubtarget();
const Function &F = MF->getFunction();
+ // TODO: We're checking this late, would be nice to check it earlier.
+ if (STM.requiresCodeObjectV6() && CodeObje
@@ -139,10 +139,10 @@ bool
AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
const GCNSubtarget *ST =
static_cast(TM->getSubtargetImpl(F));
- // Check the GPU isn't generic. Generic is used for testing only
- // and we don't want this pass to interfere
@@ -279,13 +279,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions
&Opts,
if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
return;
- StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
-
https://github.com/arsenm commented:
Can we land the infrastructure to allow linking of compiler-rt binaries without
the specifics for divide 128?
https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
arsenm wrote:
> I may have mentioned a few times that I don't like function attributes
> controlling fast-math behaviors.
It doesn't control it, it's informative. You just get undefined behavior if you
end up calling mismatched mode functions.
It does control it in the AMDGPU entry point func
@@ -1293,8 +1293,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder
&MIRBuilder,
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
- else
+ else {
+// For an intrinsic call (e.g. memset),
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/68515
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> * Which value allows generating the "fastest" math code -- disregarding
> correctness? I'd assume that "dynamic" is least optimizable, "ieee" in the
> middle, and "preserve-sign" is likely to generate the "fastest" code?
This depends on the target and operations. For some funct
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/68515
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1129,8 +1129,97 @@ struct BitTest {
static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
};
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullopt otherwise.
+std::optional getConvergenceToken(llvm::BasicBlock *BB)
{
+ for
@@ -1129,8 +1129,97 @@ struct BitTest {
static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
};
+
+// Returns the first convergence entry/loop/anchor instruction found in |BB|.
+// std::nullopt otherwise.
+std::optional getConvergenceToken(llvm::BasicBlock *BB)
{
+ for
@@ -1293,8 +1293,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder
&MIRBuilder,
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
- else
+ else {
+// For an intrinsic call (e.g. memset),
@@ -862,14 +862,18 @@ static void instrumentOneFunc(
auto Name = FuncInfo.FuncNameVar;
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
FuncInfo.FunctionHash);
+ // Make sure that pointer to global is passed in with zero
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/80183
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1025,6 +1025,26 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const
MachineFunction &MF,
OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());
}
+// Helper function to add common PAL Metadata 3.0+
+static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
+
@@ -1127,10 +1131,16 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const
MachineFunction &MF) {
MD->setFunctionScratchSize(FnName, MFI.getStackSize());
const GCNSubtarget &ST = MF.getSubtarget();
- // Set compute registers
- MD->setRsrc1(CallingConv::AMDGPU_CS,
-
arsenm wrote:
> I wonder if, instead, we should just have `-ffast-math` always downgrade
> `-fdenormal-fp-math=ieee` to `-fdenormal-fp-math=preserve-sign`, under the
> rationale of "you asked for fast math, and preserve-sign mode might let the
> compiler generate faster code"?
This could also
arsenm wrote:
> > It looks reasonable to me, although I'm not really an AMDGPU person. /me
> > summons @arsenm ?
>
> AMDGPU backend relies on LLVM passes to translate printf at IR level.
For the OpenCL case only, not for HIP/OpenMP
https://github.com/llvm/llvm-project/pull/68515
_
@@ -0,0 +1,21 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -disable-llvm-optzns
-mprintf-kind=hostcall -fno-builtin-printf -fcuda-is-device \
+// RUN: -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emi
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/80303
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/79980
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> LGTM. Please update PR title before merging
So this was only supposed to add the test, or implement this too?
https://github.com/llvm/llvm-project/pull/71019
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.or
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/71019
>From 2477ae87e7bb82b4551e42b8255dfe93dadff453 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap
Date: Thu, 2 Nov 2023 01:05:35 -0400
Subject: [PATCH 1/6] [AMDGPU] Add code model (#70760) test for amdgpu target.
---
@@ -4,13 +4,10 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature
-wavefrontsize64 -verify -S -o - %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s
+// expected-no-diagnostics
+
typedef unsigned long ulong;
void test_ba
@@ -151,7 +151,7 @@ BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui",
"nc")
//===--===//
TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "ZUib", "nc", "wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_ba
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/80102
>From b64f7ba4afc6cbb3e5e34757e6979a0d5ee73e2b Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe
Date: Tue, 30 Jan 2024 11:26:53 +0530
Subject: [PATCH] [AMDGPU] Every convergent operation needs post-isel
proces
@@ -175,6 +175,8 @@ Predefined Macros
- Defined when the GPU default stream is set to per-thread mode.
* - ``HIP_API_PER_THREAD_DEFAULT_STREAM``
- Alias to ``__HIP_API_PER_THREAD_DEFAULT_STREAM__``. Deprecated.
+ * - ``__AMDGCN_WAVEFRONT_SIZE__``
@@ -25,4 +25,4 @@ entry:
}
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
+!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
arsenm wrote:
Separate would be better
https://github.com/llvm/llvm-project/pull/79905
__
https://github.com/arsenm commented:
Also should get a run line that errors due to wavesize?
https://github.com/llvm/llvm-project/pull/79980
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-co
@@ -1293,8 +1293,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder
&MIRBuilder,
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
- else
+ else {
+// For an intrinsic call (e.g. memset),
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/79905
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/79795
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2561,6 +2567,70 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const
SIMemOpInfo &MOI,
return Changed;
}
+bool SIMemoryLegalizer::GFX9InsertWaitcntForPreciseMem(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget();
+ const SIInstrInfo *TII = ST.get
@@ -2561,6 +2567,70 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const
SIMemOpInfo &MOI,
return Changed;
}
+bool SIMemoryLegalizer::GFX9InsertWaitcntForPreciseMem(MachineFunction &MF) {
arsenm wrote:
can you just make this happen as a consequence of
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74056
>From 9be777d5b39852cf3c0b2538fd5f712922672caa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 1 Dec 2023 18:00:13 +0900
Subject: [PATCH 1/2] Reapply "InstCombine: Introduce
SimplifyDemandedUseFPClass""
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/74056
>From 9be777d5b39852cf3c0b2538fd5f712922672caa Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 1 Dec 2023 18:00:13 +0900
Subject: [PATCH] Reapply "InstCombine: Introduce SimplifyDemandedUseFPClass""
This
https://github.com/arsenm closed https://github.com/llvm/llvm-project/pull/66522
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2641,8 +2641,8 @@ define float @assume_false_smallest_normal(float %arg) {
}
define float @clamp_false_nan(float %arg) {
-; CHECK-LABEL: define float @clamp_false_nan(
-; CHECK-SAME: float returned [[ARG:%.*]]) #[[ATTR2]] {
+; CHECK-LABEL: define nofpclass(nan inf nzero su
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/66522
>From 076ab2374d84c4112e0bf3fb11ecda2f5774785e Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Mon, 11 Sep 2023 10:56:40 +0300
Subject: [PATCH 1/7] ValueTracking: Merge fcmpImpliesClass and fcmpToClassTest
--
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/66522
>From 076ab2374d84c4112e0bf3fb11ecda2f5774785e Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Mon, 11 Sep 2023 10:56:40 +0300
Subject: [PATCH 1/6] ValueTracking: Merge fcmpImpliesClass and fcmpToClassTest
--
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/66522
>From 076ab2374d84c4112e0bf3fb11ecda2f5774785e Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Mon, 11 Sep 2023 10:56:40 +0300
Subject: [PATCH 1/2] ValueTracking: Merge fcmpImpliesClass and fcmpToClassTest
--
arsenm wrote:
Not sure if we need additional negative tests for missing disjoints
https://github.com/llvm/llvm-project/pull/76997
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/76997
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -418,8 +418,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final :
public TargetInfo {
// value ~0.
uint64_t getNullPointerValue(LangAS AS) const override {
// FIXME: Also should handle region.
-return (AS == LangAS::opencl_local || AS == LangAS::opencl_pr
@@ -2601,67 +2601,73 @@ def int_amdgcn_ds_bvh_stack_rtn :
[ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
+def int_amdgcn_s_wait_event_export_ready :
+ ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">,
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffec
Mirko =?utf-8?q?Brkušanin?= ,
Mirko =?utf-8?q?Brkušanin?= ,Mirko Brkusanin
,Mariusz Sikora
Message-ID:
In-Reply-To:
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/78414
___
cfe-commits mailing list
cfe-commits@lists.llvm.o
Mirko =?utf-8?q?Brku=C5=A1anin?= ,
Mirko =?utf-8?q?Brku=C5=A1anin?= ,Mirko Brkusanin
,Mariusz Sikora
Message-ID:
In-Reply-To:
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/78414
___
cfe-commits mail
Mirko =?utf-8?q?Brkušanin?= ,
Mirko =?utf-8?q?Brkušanin?= ,Mirko Brkusanin
,Mariusz Sikora
Message-ID:
In-Reply-To:
@@ -8770,6 +8781,22 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const
OperandVector &Operands,
}
}
+int VdstInIdx = AMDGPU::getNamedOper
@@ -2601,67 +2601,73 @@ def int_amdgcn_ds_bvh_stack_rtn :
[ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
+def int_amdgcn_s_wait_event_export_ready :
+ ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">,
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffec
arsenm wrote:
Should get a mention in the release notes
https://github.com/llvm/llvm-project/pull/79038
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
801 - 900 of 1471 matches
Mail list logo