[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-15 Thread Jakub Chlanda via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7258317bade0: [NVPTX] Expose LDU builtins (authored by 
jchlanda).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
  clang/test/CodeGen/builtins-nvptx-native-half-type.c
  clang/test/CodeGen/builtins-nvptx.c
  llvm/test/CodeGen/NVPTX/ldu-ldg.ll

Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll
===
--- llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -3,7 +3,13 @@
 
 
 declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
 
 declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
@@ -14,72 +20,114 @@
 declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
 declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
 
-; CHECK: test_ldu_i8
+; CHECK-LABEL: test_ldu_i8
 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) {
-; ldu.global.u8
+  ; CHECK: ldu.global.u8
   %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
-; CHECK: test_ldu_i32
+; CHECK-LABEL: test_ldu_i16
+define i16 @test_ldu_i16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u16
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret i16 %val
+}
+
+; CHECK-LABEL: test_ldu_i32
 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) {
-; ldu.global.u32
+  ; CHECK: ldu.global.u32
   %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
-; CHECK: test_ldg_i8
+; CHECK-LABEL: test_ldu_i64
+define i64 @test_ldu_i64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u64
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret i64 %val
+}
+
+; CHECK-LABEL: test_ldu_f32
+define float @test_ldu_f32(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f32
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret float %val
+}
+
+; CHECK-LABEL: test_ldu_f64
+define double @test_ldu_f64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret double %val
+}
+
+; CHECK-LABEL: test_ldu_f16
+define half @test_ldu_f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b16
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret half %val
+}
+
+; CHECK-LABEL: test_ldu_v2f16
+define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b32
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret <2 x half> %val
+}
+
+; CHECK-LABEL: test_ldg_i8
 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) {
-; ld.global.nc.u8
+  ; CHECK: ld.global.nc.u8
   %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
-; CHECK: test_ldg_i16
+; CHECK-LABEL: test_ldg_i16
 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) {
-; ld.global.nc.u16
-  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4)
+  ; CHECK: ld.global.nc.u16
+  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
   ret i16 %val
 }
 
-; CHECK: test_ldg_i32
+; CHECK-LABEL: test_ldg_i32
 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u32
+  ; CHECK: ld.global.nc.u32
   %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
-; CHECK: test_ldg_i64
+; CHECK-LABEL: test_ldg_i64
 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.u64
   %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
   ret i64 %val
 }
 
-; CHECK: test_ldg_f32
+; CHECK-LABEL: test_ldg_f32
 define float @test_ldg_f32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.f32
   %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) 

[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-15 Thread Jakub Chlanda via Phabricator via cfe-commits
jchlanda updated this revision to Diff 505394.
jchlanda added a comment.

Use `CHECK-LABEL`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
  clang/test/CodeGen/builtins-nvptx-native-half-type.c
  clang/test/CodeGen/builtins-nvptx.c
  llvm/test/CodeGen/NVPTX/ldu-ldg.ll

Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll
===
--- llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -3,7 +3,13 @@
 
 
 declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
 
 declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
@@ -14,72 +20,114 @@
 declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
 declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
 
-; CHECK: test_ldu_i8
+; CHECK-LABEL: test_ldu_i8
 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) {
-; ldu.global.u8
+  ; CHECK: ldu.global.u8
   %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
-; CHECK: test_ldu_i32
+; CHECK-LABEL: test_ldu_i16
+define i16 @test_ldu_i16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u16
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret i16 %val
+}
+
+; CHECK-LABEL: test_ldu_i32
 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) {
-; ldu.global.u32
+  ; CHECK: ldu.global.u32
   %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
-; CHECK: test_ldg_i8
+; CHECK-LABEL: test_ldu_i64
+define i64 @test_ldu_i64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u64
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret i64 %val
+}
+
+; CHECK-LABEL: test_ldu_f32
+define float @test_ldu_f32(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f32
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret float %val
+}
+
+; CHECK-LABEL: test_ldu_f64
+define double @test_ldu_f64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret double %val
+}
+
+; CHECK-LABEL: test_ldu_f16
+define half @test_ldu_f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b16
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret half %val
+}
+
+; CHECK-LABEL: test_ldu_v2f16
+define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b32
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret <2 x half> %val
+}
+
+; CHECK-LABEL: test_ldg_i8
 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) {
-; ld.global.nc.u8
+  ; CHECK: ld.global.nc.u8
   %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
-; CHECK: test_ldg_i16
+; CHECK-LABEL: test_ldg_i16
 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) {
-; ld.global.nc.u16
-  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4)
+  ; CHECK: ld.global.nc.u16
+  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
   ret i16 %val
 }
 
-; CHECK: test_ldg_i32
+; CHECK-LABEL: test_ldg_i32
 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u32
+  ; CHECK: ld.global.nc.u32
   %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
-; CHECK: test_ldg_i64
+; CHECK-LABEL: test_ldg_i64
 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.u64
   %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
   ret i64 %val
 }
 
-; CHECK: test_ldg_f32
+; CHECK-LABEL: test_ldg_f32
 define float @test_ldg_f32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.f32
   %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
   ret float %val
 }
 
-; CHECK: test_ldg_f64
+; CHECK-LABEL: test_ldg_f64
 define double @test_ldg_f64(ptr 

[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-14 Thread Artem Belevich via Phabricator via cfe-commits
tra accepted this revision.
tra added a comment.
This revision is now accepted and ready to land.

LGTM with a test nit.




Comment at: llvm/test/CodeGen/NVPTX/ldu-ldg.ll:30
 
+; CHECK: test_ldu_i16
+define i16 @test_ldu_i16(ptr addrspace(1) %ptr) {

Nit. Function names usually checked with `CHECK-LABEL`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-14 Thread Jakub Chlanda via Phabricator via cfe-commits
jchlanda marked an inline comment as done.
jchlanda added a comment.

@tra is there anything else I should do for this patch? Thank you.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-09 Thread Jakub Chlanda via Phabricator via cfe-commits
jchlanda marked 5 inline comments as done.
jchlanda added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18267-18271
+auto HalfSupport = HasHalfSupport(BuiltinID);
+if (!HalfSupport.first) {
+  CGM.Error(E->getExprLoc(),
+HalfSupport.second.append(" requires native half type 
support.")
+.c_str());

tra wrote:
> jchlanda wrote:
> > tra wrote:
> > > I think we can simplify it all further.
> > > 
> > > ```
> > > auto HasHalfSupport = [&](unsigned BuiltinID) {
> > > auto  = getContext();
> > > return Context.getLangOpts().NativeHalfType || 
> > > !Context.getTargetInfo().useFP16ConversionIntrinsics();
> > > }
> > > ...
> > > 
> > > if (!HasHalfSupport(BuiltinID)) {
> > >   CGM.Error(E->getExprLoc(),   
> > > getContext().BuiltinInfo.getName(BuiltinID) + " requires native half type 
> > > support.");
> > > 
> > > ```
> > Done, although we need a string append there, StringRef and Twine would not 
> > work.
> We don't really need `append()`.
> `CGM.Error(E->getExprLoc(),   
> getContext().BuiltinInfo.getName(BuiltinID).str() + " requires native half 
> type support.");` works.
> 
Done.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-09 Thread Jakub Chlanda via Phabricator via cfe-commits
jchlanda updated this revision to Diff 503644.
jchlanda added a comment.

`append` -> `+`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
  clang/test/CodeGen/builtins-nvptx-native-half-type.c
  clang/test/CodeGen/builtins-nvptx.c
  llvm/test/CodeGen/NVPTX/ldu-ldg.ll

Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll
===
--- llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -3,7 +3,13 @@
 
 
 declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
 
 declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
@@ -16,70 +22,112 @@
 
 ; CHECK: test_ldu_i8
 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) {
-; ldu.global.u8
+  ; CHECK: ldu.global.u8
   %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
+; CHECK: test_ldu_i16
+define i16 @test_ldu_i16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u16
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret i16 %val
+}
+
 ; CHECK: test_ldu_i32
 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) {
-; ldu.global.u32
+  ; CHECK: ldu.global.u32
   %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
+; CHECK: test_ldu_i64
+define i64 @test_ldu_i64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u64
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret i64 %val
+}
+
+; CHECK: test_ldu_f32
+define float @test_ldu_f32(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f32
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret float %val
+}
+
+; CHECK: test_ldu_f64
+define double @test_ldu_f64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret double %val
+}
+
+; CHECK: test_ldu_f16
+define half @test_ldu_f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b16
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret half %val
+}
+
+; CHECK: test_ldu_v2f16
+define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b32
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret <2 x half> %val
+}
+
 ; CHECK: test_ldg_i8
 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) {
-; ld.global.nc.u8
+  ; CHECK: ld.global.nc.u8
   %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
 ; CHECK: test_ldg_i16
 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) {
-; ld.global.nc.u16
-  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4)
+  ; CHECK: ld.global.nc.u16
+  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
   ret i16 %val
 }
 
 ; CHECK: test_ldg_i32
 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u32
+  ; CHECK: ld.global.nc.u32
   %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
 ; CHECK: test_ldg_i64
 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.u64
   %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
   ret i64 %val
 }
 
 ; CHECK: test_ldg_f32
 define float @test_ldg_f32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.f32
   %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
   ret float %val
 }
 
 ; CHECK: test_ldg_f64
 define double @test_ldg_f64(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.f64
   %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
   ret double %val
 }
 
 ; CHECK: test_ldg_f16
 define half @test_ldg_f16(ptr addrspace(1) %ptr) {
-; ld.global.nc.b16
-  %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ; CHECK: ld.global.nc.b16
+  %val = tail call half 

[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-08 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18267-18271
+auto HalfSupport = HasHalfSupport(BuiltinID);
+if (!HalfSupport.first) {
+  CGM.Error(E->getExprLoc(),
+HalfSupport.second.append(" requires native half type 
support.")
+.c_str());

jchlanda wrote:
> tra wrote:
> > I think we can simplify it all further.
> > 
> > ```
> > auto HasHalfSupport = [&](unsigned BuiltinID) {
> > auto  = getContext();
> > return Context.getLangOpts().NativeHalfType || 
> > !Context.getTargetInfo().useFP16ConversionIntrinsics();
> > }
> > ...
> > 
> > if (!HasHalfSupport(BuiltinID)) {
> >   CGM.Error(E->getExprLoc(),   
> > getContext().BuiltinInfo.getName(BuiltinID) + " requires native half type 
> > support.");
> > 
> > ```
> Done, although we need a string append there, StringRef and Twine would not 
> work.
We don't really need `append()`.
`CGM.Error(E->getExprLoc(),   getContext().BuiltinInfo.getName(BuiltinID).str() 
+ " requires native half type support.");` works.



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-07 Thread Jakub Chlanda via Phabricator via cfe-commits
jchlanda added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18267-18271
+auto HalfSupport = HasHalfSupport(BuiltinID);
+if (!HalfSupport.first) {
+  CGM.Error(E->getExprLoc(),
+HalfSupport.second.append(" requires native half type 
support.")
+.c_str());

tra wrote:
> I think we can simplify it all further.
> 
> ```
> auto HasHalfSupport = [&](unsigned BuiltinID) {
> auto  = getContext();
> return Context.getLangOpts().NativeHalfType || 
> !Context.getTargetInfo().useFP16ConversionIntrinsics();
> }
> ...
> 
> if (!HasHalfSupport(BuiltinID)) {
>   CGM.Error(E->getExprLoc(),   
> getContext().BuiltinInfo.getName(BuiltinID) + " requires native half type 
> support.");
> 
> ```
Done, although we need a string append there, StringRef and Twine would not 
work.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-07 Thread Jakub Chlanda via Phabricator via cfe-commits
jchlanda updated this revision to Diff 503242.
jchlanda added a comment.

Simplify the check for half tys support.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
  clang/test/CodeGen/builtins-nvptx-native-half-type.c
  clang/test/CodeGen/builtins-nvptx.c
  llvm/test/CodeGen/NVPTX/ldu-ldg.ll

Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll
===
--- llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -3,7 +3,13 @@
 
 
 declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
 
 declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
@@ -16,70 +22,112 @@
 
 ; CHECK: test_ldu_i8
 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) {
-; ldu.global.u8
+  ; CHECK: ldu.global.u8
   %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
+; CHECK: test_ldu_i16
+define i16 @test_ldu_i16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u16
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret i16 %val
+}
+
 ; CHECK: test_ldu_i32
 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) {
-; ldu.global.u32
+  ; CHECK: ldu.global.u32
   %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
+; CHECK: test_ldu_i64
+define i64 @test_ldu_i64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u64
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret i64 %val
+}
+
+; CHECK: test_ldu_f32
+define float @test_ldu_f32(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f32
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret float %val
+}
+
+; CHECK: test_ldu_f64
+define double @test_ldu_f64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret double %val
+}
+
+; CHECK: test_ldu_f16
+define half @test_ldu_f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b16
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret half %val
+}
+
+; CHECK: test_ldu_v2f16
+define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b32
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret <2 x half> %val
+}
+
 ; CHECK: test_ldg_i8
 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) {
-; ld.global.nc.u8
+  ; CHECK: ld.global.nc.u8
   %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
 ; CHECK: test_ldg_i16
 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) {
-; ld.global.nc.u16
-  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4)
+  ; CHECK: ld.global.nc.u16
+  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
   ret i16 %val
 }
 
 ; CHECK: test_ldg_i32
 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u32
+  ; CHECK: ld.global.nc.u32
   %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
 ; CHECK: test_ldg_i64
 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.u64
   %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
   ret i64 %val
 }
 
 ; CHECK: test_ldg_f32
 define float @test_ldg_f32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.f32
   %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
   ret float %val
 }
 
 ; CHECK: test_ldg_f64
 define double @test_ldg_f64(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.f64
   %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
   ret double %val
 }
 
 ; CHECK: test_ldg_f16
 define half @test_ldg_f16(ptr addrspace(1) %ptr) {
-; ld.global.nc.b16
-  %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ; CHECK: ld.global.nc.b16
+  %val = tail call half 

[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-07 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18267-18271
+auto HalfSupport = HasHalfSupport(BuiltinID);
+if (!HalfSupport.first) {
+  CGM.Error(E->getExprLoc(),
+HalfSupport.second.append(" requires native half type 
support.")
+.c_str());

I think we can simplify it all further.

```
auto HasHalfSupport = [&](unsigned BuiltinID) {
auto  = getContext();
return Context.getLangOpts().NativeHalfType || 
!Context.getTargetInfo().useFP16ConversionIntrinsics();
}
...

if (!HasHalfSupport(BuiltinID)) {
  CGM.Error(E->getExprLoc(),   getContext().BuiltinInfo.getName(BuiltinID) 
+ " requires native half type support.");

```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-07 Thread Jakub Chlanda via Phabricator via cfe-commits
jchlanda added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18116
+case NVPTX::BI__nvvm_ldu_h:
+  BuiltinName = "__nvvm_ldu_h";
+  break;

tra wrote:
> Can we use the standard `StringRef Name = 
> getContext().BuiltinInfo.getName(BuiltinID);` to figure out the builtin name?
Ha, had a feeling it would exist, couldn't find it. Thanks.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18261-18263
+  std::string ErrMsg{HalfSupport.second};
+  CGM.Error(E->getExprLoc(),
+ErrMsg.append(" requires native half type support.").c_str());

tra wrote:
> Nit: this would be a bit more readable:
> ```
> std::string BuiltinName{HalfSupport.second};
> CGM.Error(E->getExprLoc(),  BuiltinName + " requires native half type 
> support.")` 
> ```
> You may also consider changing returned `BuiltinName` to be `std::string`, so 
> you would not need an explicit temp var. 
Done the `std::string` return, thanks.



Comment at: llvm/test/CodeGen/NVPTX/ldu-ldg.ll:60
+define double @test_ldu_f64(ptr addrspace(1) %ptr) {
+; ldu.global.u64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) 
%ptr, i32 8)

tra wrote:
> Hmm. I wonder why we end up with `u64` here and not `b64`. Not that it 
> matters in this case, but it is a discrepancy vs. `f16`.
That is copy/paste sloppiness on my part, sorry. I've updated the test to check 
generated PTX, not just the labels, and fixed the values.

It generates correct kinds of loads, based on the type, the only discrepancy is 
that it doesn't distinguish between signed and unsigned loads, always choosing 
the unsigned variant. I think that's by design, at 
[ISel](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp#L1683)
 there is a check if the load needs to be extended and a correct `CVT` 
instruction will be added.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-07 Thread Jakub Chlanda via Phabricator via cfe-commits
jchlanda updated this revision to Diff 502948.
jchlanda marked 3 inline comments as done.
jchlanda added a comment.

Address PR comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
  clang/test/CodeGen/builtins-nvptx-native-half-type.c
  clang/test/CodeGen/builtins-nvptx.c
  llvm/test/CodeGen/NVPTX/ldu-ldg.ll

Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll
===
--- llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -3,7 +3,13 @@
 
 
 declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
 
 declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
@@ -16,70 +22,112 @@
 
 ; CHECK: test_ldu_i8
 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) {
-; ldu.global.u8
+  ; CHECK: ldu.global.u8
   %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
+; CHECK: test_ldu_i16
+define i16 @test_ldu_i16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u16
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret i16 %val
+}
+
 ; CHECK: test_ldu_i32
 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) {
-; ldu.global.u32
+  ; CHECK: ldu.global.u32
   %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
+; CHECK: test_ldu_i64
+define i64 @test_ldu_i64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.u64
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret i64 %val
+}
+
+; CHECK: test_ldu_f32
+define float @test_ldu_f32(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f32
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret float %val
+}
+
+; CHECK: test_ldu_f64
+define double @test_ldu_f64(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.f64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret double %val
+}
+
+; CHECK: test_ldu_f16
+define half @test_ldu_f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b16
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
+  ret half %val
+}
+
+; CHECK: test_ldu_v2f16
+define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) {
+  ; CHECK: ldu.global.b32
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret <2 x half> %val
+}
+
 ; CHECK: test_ldg_i8
 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) {
-; ld.global.nc.u8
+  ; CHECK: ld.global.nc.u8
   %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
 ; CHECK: test_ldg_i16
 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) {
-; ld.global.nc.u16
-  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4)
+  ; CHECK: ld.global.nc.u16
+  %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
   ret i16 %val
 }
 
 ; CHECK: test_ldg_i32
 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u32
+  ; CHECK: ld.global.nc.u32
   %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
 ; CHECK: test_ldg_i64
 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.u64
   %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
   ret i64 %val
 }
 
 ; CHECK: test_ldg_f32
 define float @test_ldg_f32(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.f32
   %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
   ret float %val
 }
 
 ; CHECK: test_ldg_f64
 define double @test_ldg_f64(ptr addrspace(1) %ptr) {
-; ld.global.nc.u64
+  ; CHECK: ld.global.nc.f64
   %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
   ret double %val
 }
 
 ; CHECK: test_ldg_f16
 define half @test_ldg_f16(ptr addrspace(1) %ptr) {
-; ld.global.nc.b16
-  %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ; CHECK: ld.global.nc.b16
+  

[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-03 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

Nice. Thank you!




Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18104
 CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
-  auto MakeLdg = [&](unsigned IntrinsicID) {
+  auto HasHalfSupport = [&](unsigned BuiltinID) {
+auto  = getContext();

I'd add a comment describing a meaning of the fields in the returned pair.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18116
+case NVPTX::BI__nvvm_ldu_h:
+  BuiltinName = "__nvvm_ldu_h";
+  break;

Can we use the standard `StringRef Name = 
getContext().BuiltinInfo.getName(BuiltinID);` to figure out the builtin name?



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18261-18263
+  std::string ErrMsg{HalfSupport.second};
+  CGM.Error(E->getExprLoc(),
+ErrMsg.append(" requires native half type support.").c_str());

Nit: this would be a bit more readable:
```
std::string BuiltinName{HalfSupport.second};
CGM.Error(E->getExprLoc(),  BuiltinName + " requires native half type 
support.")` 
```
You may also consider changing returned `BuiltinName` to be `std::string`, so 
you would not need an explicit temp var. 



Comment at: clang/test/CodeGen/builtins-nvptx-native-half-type-err.c:3
+//
+// RUN: not %clang_cc1 -DLDG -fsyntax-only -ffp-contract=off -triple 
nvptx-unknown-unknown -target-cpu \
+// RUN:   sm_75 -target-feature +ptx70 -fcuda-is-device -x cuda -emit-llvm -o 
- %s 2>&1 \

I think we could've done it all in one run if we were to do both ldg and ldu in 
one function.



Comment at: llvm/test/CodeGen/NVPTX/ldu-ldg.ll:33
+; ldu.global.u16
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, 
i32 4)
+  ret i16 %val

Should alignment be 2 ?



Comment at: llvm/test/CodeGen/NVPTX/ldu-ldg.ll:60
+define double @test_ldu_f64(ptr addrspace(1) %ptr) {
+; ldu.global.u64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) 
%ptr, i32 8)

Hmm. I wonder why we end up with `u64` here and not `b64`. Not that it matters 
in this case, but it is a discrepancy vs. `f16`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D145238/new/

https://reviews.llvm.org/D145238

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D145238: [NVPTX] Expose LDU builtins

2023-03-03 Thread Jakub Chlanda via Phabricator via cfe-commits
jchlanda created this revision.
jchlanda added a reviewer: tra.
Herald added subscribers: mattd, gchakrabarti, asavonic.
Herald added a project: All.
jchlanda requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, jholewinski.
Herald added projects: clang, LLVM.

Also check if native half types are supported to give more descriptive error 
message, without it clang only reports incorrect intrinsic return type.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D145238

Files:
  clang/include/clang/Basic/BuiltinsNVPTX.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
  clang/test/CodeGen/builtins-nvptx-native-half-type.c
  clang/test/CodeGen/builtins-nvptx.c
  llvm/test/CodeGen/NVPTX/ldu-ldg.ll

Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll
===
--- llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -3,7 +3,13 @@
 
 
 declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
+declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
+declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
 
 declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
 declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
@@ -21,6 +27,13 @@
   ret i8 %val
 }
 
+; CHECK: test_ldu_i16
+define i16 @test_ldu_i16(ptr addrspace(1) %ptr) {
+; ldu.global.u16
+  %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret i16 %val
+}
+
 ; CHECK: test_ldu_i32
 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) {
 ; ldu.global.u32
@@ -28,6 +41,41 @@
   ret i32 %val
 }
 
+; CHECK: test_ldu_i64
+define i64 @test_ldu_i64(ptr addrspace(1) %ptr) {
+; ldu.global.u64
+  %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret i64 %val
+}
+
+; CHECK: test_ldu_f32
+define float @test_ldu_f32(ptr addrspace(1) %ptr) {
+; ldu.global.u64
+  %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
+  ret float %val
+}
+
+; CHECK: test_ldu_f64
+define double @test_ldu_f64(ptr addrspace(1) %ptr) {
+; ldu.global.u64
+  %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
+  ret double %val
+}
+
+; CHECK: test_ldu_f16
+define half @test_ldu_f16(ptr addrspace(1) %ptr) {
+; ldu.global.b16
+  %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret half %val
+}
+
+; CHECK: test_ldu_v2f16
+define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) {
+; ldu.global.b32
+  %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
+  ret <2 x half> %val
+}
+
 ; CHECK: test_ldg_i8
 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) {
 ; ld.global.nc.u8
Index: clang/test/CodeGen/builtins-nvptx.c
===
--- clang/test/CodeGen/builtins-nvptx.c
+++ clang/test/CodeGen/builtins-nvptx.c
@@ -652,6 +652,97 @@
   __nvvm_ldg_d2((const double2 *)p);
 }
 
+// CHECK-LABEL: nvvm_ldu
+__device__ void nvvm_ldu(const void *p) {
+  // CHECK: call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1)
+  // CHECK: call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1)
+  __nvvm_ldu_c((const char *)p);
+  __nvvm_ldu_uc((const unsigned char *)p);
+
+  // CHECK: call i16 @llvm.nvvm.ldu.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2)
+  // CHECK: call i16 @llvm.nvvm.ldu.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2)
+  __nvvm_ldu_s((const short *)p);
+  __nvvm_ldu_us((const unsigned short *)p);
+
+  // CHECK: call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
+  // CHECK: call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
+  __nvvm_ldu_i((const int *)p);
+  __nvvm_ldu_ui((const unsigned int *)p);
+
+  // LP32: call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
+  // LP32: call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
+  // LP64: call i64 @llvm.nvvm.ldu.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8)
+  // LP64: call i64 @llvm.nvvm.ldu.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8)
+  __nvvm_ldu_l((const long *)p);
+  __nvvm_ldu_ul((const unsigned long *)p);
+
+  // CHECK: call float @llvm.nvvm.ldu.global.f.f32.p0(ptr {{%[0-9]+}}, i32 4)
+  __nvvm_ldu_f((const float *)p);
+  // CHECK: call double @llvm.nvvm.ldu.global.f.f64.p0(ptr {{%[0-9]+}}, i32 8)
+  __nvvm_ldu_d((const double *)p);
+
+  // CHECK: call <2 x i8>