@@ -3172,8 +3172,8 @@ def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
[llvm_anyint_ty], [IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
-def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty],
- [IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+def int_amdgcn_wave_reconverge :
@@ -15740,6 +15740,32 @@ void
SITargetLowering::finalizeLowering(MachineFunction &MF) const {
}
}
+ // ISel inserts copy to regs for the successor PHIs
+ // at the BB end. We need to move the SI_WAVE_RECONVERGE right before the
+ // branch.
+ for (auto &MBB : MF) {
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/92809
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm requested changes to this pull request.
There are quite a few code quality regressions, and XFAILed tests. The
description needs more elaboration on what the strategy is here
https://github.com/llvm/llvm-project/pull/92809
_
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
return 0;
}
+ /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+ /// can be casted to / from all other target address spaces. If the target
+ /// exposes no such add
@@ -0,0 +1,86 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 5
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm
-o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN: %clang_cc1 %s -O0 -triple amdg
@@ -0,0 +1,84 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --function-signature
+ // REQUIRES: amdgpu-registered-target
+ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde
-emit-llvm -o - %s | FileCheck %s
+ // RUN
@@ -0,0 +1,84 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --function-signature
+ // REQUIRES: amdgpu-registered-target
+ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde
-emit-llvm -o - %s | FileCheck %s
+ // RUN
@@ -0,0 +1,9 @@
+
arsenm wrote:
Extra blank line
https://github.com/llvm/llvm-project/pull/94830
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,17 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn
-Wno-unused-value %s
+
arsenm wrote:
We probably want another similar sema test for OpenCL/HIP/OpenMP
https://github.com/llvm/llvm-pro
@@ -1764,6 +1764,13 @@ class TargetInfo : public TransferrableTargetInfo,
return 0;
}
+ /// \returns Target specific flat ptr address space; a flat ptr is a ptr that
+ /// can be casted to / from all other target address spaces. If the target
+ /// exposes no such add
@@ -0,0 +1,86 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 5
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm
-o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN: %clang_cc1 %s -O0 -triple amdg
@@ -0,0 +1,86 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 5
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm
-o - | FileCheck --check-prefix=OPENCL12 %s
+// RUN: %clang_cc1 %s -O0 -triple amdg
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/95395
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,65 @@
+; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100
-verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
+
+; CHECK-LABEL: name:basic_readfirstlane_i64
+; CHECK:[[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL
@@ -0,0 +1,21 @@
+//===-- AMDGPUTypes.def - Metadata about AMDGPU types ---*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -0,0 +1,84 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --function-signature
+ // REQUIRES: amdgpu-registered-target
+ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde
-emit-llvm -o - %s | FileCheck %s
+ // RUN
@@ -0,0 +1,21 @@
+//===-- AMDGPUTypes.def - Metadata about AMDGPU types ---*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
@@ -0,0 +1,65 @@
+; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100
-verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
+
+; CHECK-LABEL: name:basic_readfirstlane_i64
+; CHECK:[[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL
@@ -6129,13 +6150,55 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI,
SDNode *N,
if (ValSize % 32 != 0)
return SDValue();
+ auto unrollLaneOp = [&DAG, &SL](SDNode *N) -> SDValue {
+EVT VT = N->getValueType(0);
+unsigned NE = VT.getVectorNumElements();
@@ -0,0 +1,65 @@
+; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx1100
-verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
+
+; CHECK-LABEL: name:basic_readfirstlane_i64
+; CHECK:[[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL
@@ -0,0 +1,69 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+ // REQUIRES: amdgpu-registered-target
+ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde
-emit-llvm -o - %s | FileCheck %s
+ // RUN: %clang_cc1 -triple amdgcn-unkn
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/95373
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -1125,6 +1125,22 @@ void Clang::AddPreprocessingOptions(Compilation &C,
const JobAction &JA,
CmdArgs.push_back("__clang_openmp_device_functions.h");
}
+ if (Args.hasArg(options::OPT_foffload_via_llvm)) {
+// Add llvm_wrappers/* to our system include path. This
@@ -1125,6 +1125,22 @@ void Clang::AddPreprocessingOptions(Compilation &C,
const JobAction &JA,
CmdArgs.push_back("__clang_openmp_device_functions.h");
}
+ if (Args.hasArg(options::OPT_foffload_via_llvm)) {
+// Add llvm_wrappers/* to our system include path. This
arsenm wrote:
> Just a note - and maybe this was already discussed above - is there good
> reason not to explicitly make this type a 128-bit scalar? The LLVM data
> layout already does this
I thought this was the 160 bit version?
Can we have an opaque-but-sized type? The concern is exposing
@@ -0,0 +1,95 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -target-cpu
verde -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple
arsenm wrote:
> I understand the chance of conflict is low. It may be like the chance of
> hitting by a meteor. However, if we prefix with `__amdgcn_`, there is no such
> risk. And we have the benefit to clearly indicate it is a amdgcn
> target-specific type.
Should use amdgpu
https://githu
@@ -0,0 +1,95 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -target-cpu
verde -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple
@@ -0,0 +1,14 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm
-o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown
@@ -128,12 +128,13 @@ enum class CudaArch {
GFX12_GENERIC,
GFX1200,
GFX1201,
+ AMDGCNSPIRV,
Generic, // A processor model named 'generic' if the target backend defines a
// public one.
LAST,
CudaDefault = CudaArch::SM_52,
- HIPDefault = CudaArch::
arsenm wrote:
> Or drop the new nodes altogether and legelaize to intrinsics directly ?
That's another option. The only real plus to the intermediate is it's slightly
less annoying to write combines for. But there are limited combining
opportunities for these
https://github.com/llvm/llvm-p
@@ -0,0 +1,46 @@
+# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o
/dev/null %s 2>&1 | FileCheck %s
arsenm wrote:
I'd still test all 3, but yes an IR test
https://github.com/llvm/llvm-project/pull/89217
___
@@ -0,0 +1,46 @@
+# RUN: not --crash llc -mtriple=amdgcn -run-pass=none -verify-machineinstrs -o
/dev/null %s 2>&1 | FileCheck %s
arsenm wrote:
You should not need to introduce any new machine verifier tests, they are not
useful. The useful test would be the IR
@@ -2201,6 +2207,9 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const
{
Align = 8;
\
break;
#include "clang/Basic/WebAssemblyReferenceTypes.def"
+case BuiltinType::AMDGPUBufferRsrc:
+ W
@@ -0,0 +1,11 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s
+
+void foo() {
+ int n = 100;
+ __buffer_rsrc_t v = 0; // expected-error {{cannot initialize a variable of
type '__buffer_rsrc_t' with an rvalu
@@ -0,0 +1,11 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s
+
+void foo() {
+ int n = 100;
+ __buffer_rsrc_t v = 0; // expected-error {{cannot initialize a variable of
type '__buffer_rsrc_t' with an rvalu
@@ -2200,6 +2206,9 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const
{
Align = 8;
\
break;
#include "clang/Basic/WebAssemblyReferenceTypes.def"
+case BuiltinType::AMDGPUBufferRsrc:
+ W
@@ -0,0 +1,30 @@
+// RUN: %clang++ -foffload-via-llvm --offload-arch=native %s -o %t
+// RUN: %t | %fcheck-generic
+
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple amdgcn %s -emit-llvm -o -
| FileCheck %s
arsenm wrote:
Why do you need -fclang-abi-compat=latest
https://github.com/llvm/llvm-project/pull/94830
___
@@ -0,0 +1,21 @@
+//===-- AMDGPUTypes.def - Metadata about AMDGPU types ---*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apa
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/94830
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2200,6 +2206,9 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const
{
Align = 8;
\
break;
#include "clang/Basic/WebAssemblyReferenceTypes.def"
+case BuiltinType::AMDGPUBufferRsrc:
+ W
https://github.com/arsenm commented:
Need stacked PR that adds the make_buffer_rsrc builtin that shows its use
https://github.com/llvm/llvm-project/pull/94830
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailm
@@ -1091,6 +1091,9 @@ enum PredefinedTypeIDs {
// \brief WebAssembly reference types with auto numeration
#define WASM_TYPE(Name, Id, SingletonId) PREDEF_TYPE_##Id##_ID,
#include "clang/Basic/WebAssemblyReferenceTypes.def"
+// \breif AMDGPU types with auto numeration
--
@@ -16055,6 +16145,90 @@ of the two arguments. -0.0 is considered to be less
than +0.0 for this
intrinsic. Note that these are the semantics specified in the draft of
IEEE 754-2019.
+.. _i_minimumnum:
+
+'``llvm.minimumnum.*``' Intrinsic
+^
+
+
@@ -16055,6 +16145,90 @@ of the two arguments. -0.0 is considered to be less
than +0.0 for this
intrinsic. Note that these are the semantics specified in the draft of
IEEE 754-2019.
+.. _i_minimumnum:
+
+'``llvm.minimumnum.*``' Intrinsic
+^
+
+
@@ -15874,6 +15874,96 @@ The returned value is completely identical to the
input except for the sign bit;
in particular, if the input is a NaN, then the quiet/signaling bit and payload
are perfectly preserved.
+.. _i_fminmax_family:
+
+'``llvm.min.*``' Intrinsics Comparation
@@ -15874,6 +15874,96 @@ The returned value is completely identical to the
input except for the sign bit;
in particular, if the input is a NaN, then the quiet/signaling bit and payload
are perfectly preserved.
+.. _i_fminmax_family:
+
+'``llvm.min.*``' Intrinsics Comparation
@@ -16055,6 +16145,90 @@ of the two arguments. -0.0 is considered to be less
than +0.0 for this
intrinsic. Note that these are the semantics specified in the draft of
IEEE 754-2019.
+.. _i_minimumnum:
+
+'``llvm.minimumnum.*``' Intrinsic
+^
+
+
arsenm wrote:
> "aggregates" here might even be unusual cases like `<4 x i8>`
Vectors aren't aggregates and are more reasonable
https://github.com/llvm/llvm-project/pull/94576
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm
arsenm wrote:
> `voffset` and `soffset` are "offset that goes in VGPRs" and "offset that goes
> in SGPRs", with the latter having some different bounds-checking semantics on
> ... at least some of the gfx9's, IIRC.
>
Right, that's the problem. We need to know the parameters of the SRD in orde
arsenm wrote:
> 2. What I mean is that "types that work" isn't the right framing: any type
> can be legalized to one or more types that work. That is, down in the isel
> legalizer, if I call for, for example
>```llvm
>%0 = call {i64, i64, i8} @llvm.amdgcn.raw.buffer.ptr.load(ptr addrspa
arsenm wrote:
> 1. For the swizzled case, that's `struct.ptr.buffer.*`, and yeah, those will
> always need builtins because LLVM can't deal in 2D addressing schemes
But the raw buffer intrinsics have both the soffset and voffset parameters
though? Not just the struct
https://github.com/llv
arsenm wrote:
> Actually, even ignoring address space 7, it feels like these builtins if you
> could `raw.ptr.buffer.store` any type you liked, and then they could be
> type-varying in Clang?
We could either have a builtin for all the types that would work, or if we want
to treat them more li
@@ -68,6 +68,10 @@ enum class fltNonfiniteBehavior {
// `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
// encodings do not distinguish between signalling and quiet NaN.
NanOnly,
+
+ // This behavior is present in Float6E3M2FN and Float6E2M3FN types.
@@ -878,6 +896,10 @@ void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
for the significand. If double or longer, this is a signalling NaN,
which may not be ideal. If float, this is QNaN(0). */
void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/94751
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -205,7 +205,7 @@ class ToolChain {
/// Executes the given \p Executable and returns the stdout.
llvm::Expected>
- executeToolChainProgram(StringRef Executable) const;
+ executeToolChainProgram(StringRef Executable, unsigned Timeout = 0) const;
arsenm
@@ -1881,6 +1890,20 @@ TEST(APFloatTest, getSmallest) {
EXPECT_TRUE(test.isFiniteNonZero());
EXPECT_TRUE(test.isDenormal());
EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ test = APFloat::getSmallest(APFloat::Float6E3M2FN(), false);
+ expected = APFloat(APFloat::Float6
@@ -47,6 +47,10 @@ static std::string convertToString(double d, unsigned Prec,
unsigned Pad,
return std::string(Buffer.data(), Buffer.size());
}
+static bool hasNanOrInf(APFloat::Semantics S) {
+ return (S != APFloat::S_Float6E3M2FN) && (S != APFloat::S_Float6E2M3FN);
+}
-
@@ -8642,8 +8642,11 @@ The '``llvm.used``' Global Variable
The ``@llvm.used`` global is an array which has
:ref:`appending linkage `. This array contains a list of
pointers to named global variables, functions and aliases which may optionally
-have a pointer cast formed of bitc
https://github.com/arsenm commented:
Is this redundant with #93601?
https://github.com/llvm/llvm-project/pull/93914
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
Commit message also needs to be updated
https://github.com/llvm/llvm-project/pull/93601
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -2922,18 +2922,19 @@ static void emitUsed(CodeGenModule &CGM, StringRef Name,
if (List.empty())
return;
+ llvm::Type *UsedPtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext());
arsenm wrote:
Best to just use get(Ctx, 0)
https://github.com/llv
@@ -2922,18 +2922,19 @@ static void emitUsed(CodeGenModule &CGM, StringRef Name,
if (List.empty())
return;
+ llvm::Type *UsedPtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext());
+
// Convert List to what ConstantArray needs.
SmallVector UsedArray;
UsedAr
https://github.com/arsenm approved this pull request.
lgtm with nit
https://github.com/llvm/llvm-project/pull/93601
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/93601
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> If we do want addrspace(7), we'll need to expose `make.buffer.rsrc` and give
> it a `p7` variant probably.
Yes.
We probably should expose some kind of custom type instead of directly using a
C address_space(7) attribute
https://github.com/llvm/llvm-project/pull/94576
___
@@ -0,0 +1,19 @@
+; RUN: not --crash llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx900
-verify-machineinstrs -o - %s 2>&1 | FileCheck %s
arsenm wrote:
This should also be repeated for all 3 intrinsics
https://github.com/llvm/llvm-project/pull/89217
__
https://github.com/arsenm requested changes to this pull request.
@jayfoad's testcase fails and the same test should be repeated for all 3
intrinsics
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,19 @@
+; RUN: not --crash llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx900
-verify-machineinstrs -o - %s 2>&1 | FileCheck %s
arsenm wrote:
This is not an IR verifier test, it is a codegen test that fails the machine
verifier. A machine veri
https://github.com/arsenm approved this pull request.
https://github.com/llvm/llvm-project/pull/94376
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,264 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm
-o - %s | FileCheck %s --check-prefixes=VERDE
+// RUN: %clang_cc
@@ -0,0 +1,293 @@
+// REQUIRES: amdgpu-registered-target
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --function-signature
+// RUN: %clang_cc1 -cc1 -std=c23 -triple amdgcn-amd-amdhsa -emit-llvm -O1 %s
-o - | FileCheck %s
+
+void sink_0
arsenm wrote:
> @arsenm You're right about passing larger things indirectly. I'm intending to
> land this as-is, with the types inlined, as that unblocks #93362. I'm nervous
> that the extra pointer indirection will hit the same memory error that
> tweaking codegen in that patch hits (it's a s
@@ -0,0 +1,1037 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1037 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,264 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm
-o - %s | FileCheck %s --check-prefixes=VERDE
+// RUN: %clang_cc
arsenm wrote:
> Is there really a good use case for this? Can you use regular stores to
> addrspace(7) instead? @krzysz00
I see these regularly used via inline asm in various ML code. We need to expose
these in some way to stop people from doing that
>
> Also, do you really need a separate
https://github.com/arsenm commented:
Missing non-constant tests for each parameter?
https://github.com/llvm/llvm-project/pull/94376
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -197,12 +202,20 @@ ABIArgInfo
AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
-ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
+ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
@@ -0,0 +1,293 @@
+// REQUIRES: amdgpu-registered-target
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --function-signature
+// RUN: %clang_cc1 -cc1 -std=c23 -triple amdgcn-amd-amdhsa -emit-llvm -O1 %s
-o - | FileCheck %s
+
+void sink_0
@@ -197,12 +202,20 @@ ABIArgInfo
AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
-ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
+ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
@@ -32,27 +32,29 @@ class StoreInst;
/// These are the kinds of recurrences that we support.
enum class RecurKind {
- None, ///< Not a recurrence.
- Add, ///< Sum of integers.
- Mul, ///< Product of integers.
- Or, ///< Bitwise or logical OR of integers
arsenm wrote:
You should add the mentioned convergence-tokens.ll test function
https://github.com/llvm/llvm-project/pull/89217
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
arsenm wrote:
> Perhaps an alternative is to tweak LangRef wording to say that that these are
> always emitted as unqualified ptrs, and that their ephemeral nature implies
> that their AS is meaningless?
I think this is the correct way to handle it. Also we'll need a few
stripPointerCasts add
@@ -0,0 +1,1023 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
@@ -0,0 +1,1023 @@
+//===-- ExpandVariadicsPass.cpp *- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apach
arsenm wrote:
> The third argument here is like for llvm.used, it's a way to associate the
> entry with a global or function. If the corresponding global or function is
> omitted from the output then the entry will be removed. It isn't used for
> anything at run time. So I think there should b
@@ -2928,12 +2928,13 @@ static void emitUsed(CodeGenModule &CGM, StringRef Name,
for (unsigned i = 0, e = List.size(); i != e; ++i) {
UsedArray[i] =
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-cast(&*List[i]), CGM.Int8PtrTy);
---
arsenm wrote:
> I think the comments here are fed into #93362 successfully, will go through
> the list again to check.
So #93362 is the replacement, and not the sequential next piece? Can we close
this one then?
https://github.com/llvm/llvm-project/pull/89007
@@ -5005,8 +5007,11 @@ void computeKnownFPClass(const Value *V, const APInt
&DemandedElts,
// If either operand is not NaN, the result is not NaN.
if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum))
Known.knownNot(fcNan);
+ if (Neve
@@ -16049,6 +16094,84 @@ of the two arguments. -0.0 is considered to be less
than +0.0 for this
intrinsic. Note that these are the semantics specified in the draft of
IEEE 754-2019.
+.. _i_minimumnum:
+
+'``llvm.minimumnum.*``' Intrinsic
+^
+
+
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/93841
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -3636,6 +3648,22 @@ def Fmin : FPMathTemplate, LibBuiltin<"math.h"> {
let OnlyBuiltinPrefixedAliasIsConstexpr = 1;
}
+def FmaximumNum : FPMathTemplate, LibBuiltin<"math.h"> {
arsenm wrote:
I'd prefer to split the clang changes into a separate change
ht
https://github.com/arsenm commented:
> 3. PowerPC: has some interaction with the behavior of `minnum/maxnum`: need
> define `fcanonicalize`.
AMDGPU has the same handling. This is to break the signaling nan handling from
IEEE to the broken old glibc libm behavior. If we fix the definition to ma
401 - 500 of 1520 matches
Mail list logo