@@ -116,8 +116,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public
TargetInfo {
}
BuiltinVaListKind getBuiltinVaListKind() const override {
-// FIXME: implement
-return TargetInfo::CharPtrBuiltinVaList;
+return TargetInfo::VoidPtrBuiltinVaList;
@@ -54,7 +54,34 @@ class MockArgList {
}
template LIBC_INLINE T next_var() {
-++arg_counter;
+arg_counter++;
+return T(arg_counter);
+ }
+
+ size_t read_count() const { return arg_counter; }
+};
+
+// Used by the GPU implementation to parse how many bytes
@@ -54,7 +54,34 @@ class MockArgList {
}
template LIBC_INLINE T next_var() {
-++arg_counter;
+arg_counter++;
+return T(arg_counter);
+ }
+
+ size_t read_count() const { return arg_counter; }
+};
+
+// Used by the GPU implementation to parse how many bytes
@@ -54,7 +54,34 @@ class MockArgList {
}
template LIBC_INLINE T next_var() {
-++arg_counter;
+arg_counter++;
+return T(arg_counter);
+ }
+
+ size_t read_count() const { return arg_counter; }
+};
+
+// Used by the GPU implementation to parse how many bytes
@@ -942,6 +942,36 @@ struct Amdgpu final : public VariadicABIInfo {
}
};
+struct NVPTX final : public VariadicABIInfo {
+
+ bool enableForTarget() override { return true; }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext )
@@ -942,6 +942,36 @@ struct Amdgpu final : public VariadicABIInfo {
}
};
+struct NVPTX final : public VariadicABIInfo {
+
+ bool enableForTarget() override { return true; }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext )
https://github.com/JonChesterfield edited
https://github.com/llvm/llvm-project/pull/96369
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -942,6 +942,36 @@ struct Amdgpu final : public VariadicABIInfo {
}
};
+struct NVPTX final : public VariadicABIInfo {
+
+ bool enableForTarget() override { return true; }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext )
@@ -215,7 +219,10 @@ void NVPTXABIInfo::computeInfo(CGFunctionInfo ) const {
RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction , Address VAListAddr,
QualType Ty, AggValueSlot Slot) const {
- llvm_unreachable("NVPTX does not support varargs");
+
JonChesterfield wrote:
I've passed some types to nvcc on godbolt and tried to decode the results. It
looks like it's passing everything with natural alignment, flattened, with
total disregard to the minimum slot size premise clang uses.
https://github.com/llvm/llvm-project/pull/96369
jhuber6 wrote:
> > You could theoretically break this if you didn't go through the C ABI and
> > ignored type promotion, but I'm not concerned with that kind of misuse
> > since it's against the ABI in the first place.
>
> The IR has its own ABI that may or may not match whatever the platform
arsenm wrote:
> You could theoretically break this if you didn't go through the C ABI and
> ignored type promotion, but I'm not concerned with that kind of misuse since
> it's against the ABI in the first place.
The IR has its own ABI that may or may not match whatever the platform "C ABI'
jhuber6 wrote:
> The nvptx lowering looks dubious - values smaller than slot size should be
> passed with the same alignment as the slot and presently aren't. A struct
> containing i8, i16 or half should be miscompiled on nvptx as written.
I mentioned this in the original patch, it's correct
@@ -942,6 +942,36 @@ struct Amdgpu final : public VariadicABIInfo {
}
};
+struct NVPTX final : public VariadicABIInfo {
+
+ bool enableForTarget() override { return true; }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext )
https://github.com/JonChesterfield requested changes to this pull request.
The amdgpu patch is incorrect, see
https://github.com/llvm/llvm-project/pull/96370/
The nvptx lowering looks dubious - values smaller than slot size should be
passed with the same alignment as the slot and presently
@@ -942,6 +942,36 @@ struct Amdgpu final : public VariadicABIInfo {
}
};
+struct NVPTX final : public VariadicABIInfo {
+
+ bool enableForTarget() override { return true; }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext )
@@ -942,6 +942,36 @@ struct Amdgpu final : public VariadicABIInfo {
}
};
+struct NVPTX final : public VariadicABIInfo {
+
+ bool enableForTarget() override { return true; }
+
+ bool vaListPassedInSSARegister() override { return true; }
+
+ Type *vaListType(LLVMContext )
@@ -0,0 +1,77 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck
%s
+
+extern void varargs_simple(int, ...);
+
+// CHECK-LABEL: define dso_local
https://github.com/michaelrj-google approved this pull request.
LGTM from the libc side
https://github.com/llvm/llvm-project/pull/96369
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,77 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck
%s
+
+extern void varargs_simple(int, ...);
+
+// CHECK-LABEL: define dso_local
@@ -0,0 +1,77 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck
%s
+
+extern void varargs_simple(int, ...);
+
+// CHECK-LABEL: define dso_local
@@ -0,0 +1,77 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck
%s
+
+extern void varargs_simple(int, ...);
+
+// CHECK-LABEL: define dso_local
@@ -54,7 +54,8 @@ class MockArgList {
}
template LIBC_INLINE T next_var() {
-++arg_counter;
+arg_counter =
jhuber6 wrote:
Done
https://github.com/llvm/llvm-project/pull/96369
___
cfe-commits mailing
@@ -54,7 +54,8 @@ class MockArgList {
}
template LIBC_INLINE T next_var() {
-++arg_counter;
+arg_counter =
michaelrj-google wrote:
I don't think anything directly depends on its output, but also the comments on
it specifically say that it's for
@@ -54,7 +54,8 @@ class MockArgList {
}
template LIBC_INLINE T next_var() {
-++arg_counter;
+arg_counter =
jhuber6 wrote:
I didn't see any tests that actively depended on this value, and figured that
it does a similar job stating how many bytes
@@ -54,7 +54,8 @@ class MockArgList {
}
template LIBC_INLINE T next_var() {
-++arg_counter;
+arg_counter =
michaelrj-google wrote:
in that case it'd be better to create a new `ArgList` that is intended to count
sizes instead of just modifying
@@ -0,0 +1,73 @@
+//===--- GPU helper functions for printf using RPC
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -0,0 +1,73 @@
+//===--- GPU helper functions for printf using RPC
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
@@ -54,7 +54,8 @@ class MockArgList {
}
template LIBC_INLINE T next_var() {
-++arg_counter;
+arg_counter =
jhuber6 wrote:
I now use the `MockArgList` to tell determine how big the "struct" needs to be
to contain the arguments.
https://github.com/michaelrj-google commented:
I thought I submitted these comments already, oops
https://github.com/llvm/llvm-project/pull/96369
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
@@ -54,7 +54,8 @@ class MockArgList {
}
template LIBC_INLINE T next_var() {
-++arg_counter;
+arg_counter =
michaelrj-google wrote:
why was this change made?
https://github.com/llvm/llvm-project/pull/96369
@@ -0,0 +1,73 @@
+//===--- GPU helper functions for printf using RPC
===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier:
https://github.com/michaelrj-google edited
https://github.com/llvm/llvm-project/pull/96369
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
nickdesaulniers wrote:
> Do we want some sort of optimization for constant printf? 99% of the time, we
> could parse the string at compile-time. (This sort of optimization is common
> for embedded targets.)
Yes! @michaelrj-google has some ideas. Orthogonal to this PR though.
jhuber6 wrote:
Also, I just merged the prerequisite patches into this, to get the relevant
changed just look at the most recent commit. The lack of stacked PRs in GitHub
really irks me.
https://github.com/llvm/llvm-project/pull/96369
___
cfe-commits
jhuber6 wrote:
> Do we want some sort of optimization for constant printf? 99% of the time, we
> could parse the string at compile-time. (This sort of optimization is common
> for embedded targets.)
I was going to make a follow-up patch that simply skipped sending back the size
if there were
efriedma-quic wrote:
Do we want some sort of optimization for constant printf? 99% of the time, we
could parse the string at compile-time. (This sort of optimization is common
for embedded targets.)
If the format string isn't constant, is parsing the string on the GPU really
slower than
@@ -1671,6 +1671,7 @@ int main(int Argc, char **Argv) {
NewArgv.push_back(Arg->getValue());
for (const opt::Arg *Arg : Args.filtered(OPT_offload_opt_eq_minus))
NewArgv.push_back(Args.MakeArgString(StringRef("-") + Arg->getValue()));
+ llvm::errs() << "asdfasdf\n";
llvmbot wrote:
@llvm/pr-subscribers-libc
@llvm/pr-subscribers-clang
Author: Joseph Huber (jhuber6)
Changes
Summary:
This patch implements the `printf` family of functions on the GPU using
the new variadic support. This patch adapts the old handling in the
`rpc_fprintf` placeholder, but
https://github.com/jhuber6 created
https://github.com/llvm/llvm-project/pull/96369
Summary:
This patch implements the `printf` family of functions on the GPU using
the new variadic support. This patch adapts the old handling in the
`rpc_fprintf` placeholder, but adds an extra RPC call to get
40 matches
Mail list logo