[PATCH] D145238: [NVPTX] Expose LDU builtins
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG7258317bade0: [NVPTX] Expose LDU builtins (authored by jchlanda). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 Files: clang/include/clang/Basic/BuiltinsNVPTX.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-nvptx-native-half-type-err.c clang/test/CodeGen/builtins-nvptx-native-half-type.c clang/test/CodeGen/builtins-nvptx.c llvm/test/CodeGen/NVPTX/ldu-ldg.ll Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll === --- llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -3,7 +3,13 @@ declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) @@ -14,72 +20,114 @@ declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) -; CHECK: test_ldu_i8 +; CHECK-LABEL: test_ldu_i8 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) { -; ldu.global.u8 + ; CHECK: ldu.global.u8 %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } -; CHECK: test_ldu_i32 +; CHECK-LABEL: test_ldu_i16 +define i16 @test_ldu_i16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u16 + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret i16 %val +} + +; CHECK-LABEL: test_ldu_i32 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) { -; ldu.global.u32 + ; CHECK: ldu.global.u32 %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } -; CHECK: test_ldg_i8 +; CHECK-LABEL: test_ldu_i64 +define i64 @test_ldu_i64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u64 + %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret i64 %val +} + +; CHECK-LABEL: test_ldu_f32 +define float @test_ldu_f32(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f32 + %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret float %val +} + +; CHECK-LABEL: test_ldu_f64 +define double @test_ldu_f64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f64 + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret double %val +} + +; CHECK-LABEL: test_ldu_f16 +define half @test_ldu_f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b16 + %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret half %val +} + +; CHECK-LABEL: test_ldu_v2f16 +define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b32 + %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret <2 x half> %val +} + +; CHECK-LABEL: test_ldg_i8 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) { -; ld.global.nc.u8 + ; CHECK: ld.global.nc.u8 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } -; CHECK: test_ldg_i16 +; CHECK-LABEL: test_ldg_i16 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) { -; ld.global.nc.u16 - %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4) + ; CHECK: ld.global.nc.u16 + %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) ret i16 %val } -; CHECK: test_ldg_i32 +; CHECK-LABEL: test_ldg_i32 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) { -; ld.global.nc.u32 + ; CHECK: ld.global.nc.u32 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } -; CHECK: test_ldg_i64 +; CHECK-LABEL: test_ldg_i64 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.u64 %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) ret i64 %val } -; CHECK: test_ldg_f32 +; CHECK-LABEL: test_ldg_f32 define float @test_ldg_f32(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.f32 %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1)
[PATCH] D145238: [NVPTX] Expose LDU builtins
jchlanda updated this revision to Diff 505394. jchlanda added a comment. Use `CHECK-LABEL`. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 Files: clang/include/clang/Basic/BuiltinsNVPTX.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-nvptx-native-half-type-err.c clang/test/CodeGen/builtins-nvptx-native-half-type.c clang/test/CodeGen/builtins-nvptx.c llvm/test/CodeGen/NVPTX/ldu-ldg.ll Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll === --- llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -3,7 +3,13 @@ declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) @@ -14,72 +20,114 @@ declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) -; CHECK: test_ldu_i8 +; CHECK-LABEL: test_ldu_i8 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) { -; ldu.global.u8 + ; CHECK: ldu.global.u8 %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } -; CHECK: test_ldu_i32 +; CHECK-LABEL: test_ldu_i16 +define i16 @test_ldu_i16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u16 + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret i16 %val +} + +; CHECK-LABEL: test_ldu_i32 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) { -; ldu.global.u32 + ; CHECK: ldu.global.u32 %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } -; CHECK: test_ldg_i8 +; CHECK-LABEL: test_ldu_i64 +define i64 @test_ldu_i64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u64 + %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret i64 %val +} + +; CHECK-LABEL: test_ldu_f32 +define float @test_ldu_f32(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f32 + %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret float %val +} + +; CHECK-LABEL: test_ldu_f64 +define double @test_ldu_f64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f64 + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret double %val +} + +; CHECK-LABEL: test_ldu_f16 +define half @test_ldu_f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b16 + %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret half %val +} + +; CHECK-LABEL: test_ldu_v2f16 +define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b32 + %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret <2 x half> %val +} + +; CHECK-LABEL: test_ldg_i8 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) { -; ld.global.nc.u8 + ; CHECK: ld.global.nc.u8 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } -; CHECK: test_ldg_i16 +; CHECK-LABEL: test_ldg_i16 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) { -; ld.global.nc.u16 - %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4) + ; CHECK: ld.global.nc.u16 + %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) ret i16 %val } -; CHECK: test_ldg_i32 +; CHECK-LABEL: test_ldg_i32 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) { -; ld.global.nc.u32 + ; CHECK: ld.global.nc.u32 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } -; CHECK: test_ldg_i64 +; CHECK-LABEL: test_ldg_i64 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.u64 %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) ret i64 %val } -; CHECK: test_ldg_f32 +; CHECK-LABEL: test_ldg_f32 define float @test_ldg_f32(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.f32 %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) ret float %val } -; CHECK: test_ldg_f64 +; CHECK-LABEL: test_ldg_f64 define double @test_ldg_f64(ptr
[PATCH] D145238: [NVPTX] Expose LDU builtins
tra accepted this revision. tra added a comment. This revision is now accepted and ready to land. LGTM with a test nit. Comment at: llvm/test/CodeGen/NVPTX/ldu-ldg.ll:30 +; CHECK: test_ldu_i16 +define i16 @test_ldu_i16(ptr addrspace(1) %ptr) { Nit. Function names usually checked with `CHECK-LABEL`. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D145238: [NVPTX] Expose LDU builtins
jchlanda marked an inline comment as done. jchlanda added a comment. @tra is there anything else I should do for this patch? Thank you. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D145238: [NVPTX] Expose LDU builtins
jchlanda marked 5 inline comments as done. jchlanda added inline comments. Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18267-18271 +auto HalfSupport = HasHalfSupport(BuiltinID); +if (!HalfSupport.first) { + CGM.Error(E->getExprLoc(), +HalfSupport.second.append(" requires native half type support.") +.c_str()); tra wrote: > jchlanda wrote: > > tra wrote: > > > I think we can simplify it all further. > > > > > > ``` > > > auto HasHalfSupport = [&](unsigned BuiltinID) { > > > auto = getContext(); > > > return Context.getLangOpts().NativeHalfType || > > > !Context.getTargetInfo().useFP16ConversionIntrinsics(); > > > } > > > ... > > > > > > if (!HasHalfSupport(BuiltinID)) { > > > CGM.Error(E->getExprLoc(), > > > getContext().BuiltinInfo.getName(BuiltinID) + " requires native half type > > > support."); > > > > > > ``` > > Done, although we need a string append there, StringRef and Twine would not > > work. > We don't really need `append()`. > `CGM.Error(E->getExprLoc(), > getContext().BuiltinInfo.getName(BuiltinID).str() + " requires native half > type support.");` works. > Done. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D145238: [NVPTX] Expose LDU builtins
jchlanda updated this revision to Diff 503644. jchlanda added a comment. `append` -> `+` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 Files: clang/include/clang/Basic/BuiltinsNVPTX.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-nvptx-native-half-type-err.c clang/test/CodeGen/builtins-nvptx-native-half-type.c clang/test/CodeGen/builtins-nvptx.c llvm/test/CodeGen/NVPTX/ldu-ldg.ll Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll === --- llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -3,7 +3,13 @@ declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) @@ -16,70 +22,112 @@ ; CHECK: test_ldu_i8 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) { -; ldu.global.u8 + ; CHECK: ldu.global.u8 %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } +; CHECK: test_ldu_i16 +define i16 @test_ldu_i16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u16 + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret i16 %val +} + ; CHECK: test_ldu_i32 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) { -; ldu.global.u32 + ; CHECK: ldu.global.u32 %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } +; CHECK: test_ldu_i64 +define i64 @test_ldu_i64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u64 + %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret i64 %val +} + +; CHECK: test_ldu_f32 +define float @test_ldu_f32(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f32 + %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret float %val +} + +; CHECK: test_ldu_f64 +define double @test_ldu_f64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f64 + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret double %val +} + +; CHECK: test_ldu_f16 +define half @test_ldu_f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b16 + %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret half %val +} + +; CHECK: test_ldu_v2f16 +define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b32 + %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret <2 x half> %val +} + ; CHECK: test_ldg_i8 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) { -; ld.global.nc.u8 + ; CHECK: ld.global.nc.u8 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } ; CHECK: test_ldg_i16 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) { -; ld.global.nc.u16 - %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4) + ; CHECK: ld.global.nc.u16 + %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) ret i16 %val } ; CHECK: test_ldg_i32 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) { -; ld.global.nc.u32 + ; CHECK: ld.global.nc.u32 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } ; CHECK: test_ldg_i64 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.u64 %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) ret i64 %val } ; CHECK: test_ldg_f32 define float @test_ldg_f32(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.f32 %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) ret float %val } ; CHECK: test_ldg_f64 define double @test_ldg_f64(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.f64 %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) ret double %val } ; CHECK: test_ldg_f16 define half @test_ldg_f16(ptr addrspace(1) %ptr) { -; ld.global.nc.b16 - %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4) + ; CHECK: ld.global.nc.b16 + %val = tail call half
[PATCH] D145238: [NVPTX] Expose LDU builtins
tra added inline comments. Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18267-18271 +auto HalfSupport = HasHalfSupport(BuiltinID); +if (!HalfSupport.first) { + CGM.Error(E->getExprLoc(), +HalfSupport.second.append(" requires native half type support.") +.c_str()); jchlanda wrote: > tra wrote: > > I think we can simplify it all further. > > > > ``` > > auto HasHalfSupport = [&](unsigned BuiltinID) { > > auto = getContext(); > > return Context.getLangOpts().NativeHalfType || > > !Context.getTargetInfo().useFP16ConversionIntrinsics(); > > } > > ... > > > > if (!HasHalfSupport(BuiltinID)) { > > CGM.Error(E->getExprLoc(), > > getContext().BuiltinInfo.getName(BuiltinID) + " requires native half type > > support."); > > > > ``` > Done, although we need a string append there, StringRef and Twine would not > work. We don't really need `append()`. `CGM.Error(E->getExprLoc(), getContext().BuiltinInfo.getName(BuiltinID).str() + " requires native half type support.");` works. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D145238: [NVPTX] Expose LDU builtins
jchlanda added inline comments. Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18267-18271 +auto HalfSupport = HasHalfSupport(BuiltinID); +if (!HalfSupport.first) { + CGM.Error(E->getExprLoc(), +HalfSupport.second.append(" requires native half type support.") +.c_str()); tra wrote: > I think we can simplify it all further. > > ``` > auto HasHalfSupport = [&](unsigned BuiltinID) { > auto = getContext(); > return Context.getLangOpts().NativeHalfType || > !Context.getTargetInfo().useFP16ConversionIntrinsics(); > } > ... > > if (!HasHalfSupport(BuiltinID)) { > CGM.Error(E->getExprLoc(), > getContext().BuiltinInfo.getName(BuiltinID) + " requires native half type > support."); > > ``` Done, although we need a string append there, StringRef and Twine would not work. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D145238: [NVPTX] Expose LDU builtins
jchlanda updated this revision to Diff 503242. jchlanda added a comment. Simplify the check for half tys support. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 Files: clang/include/clang/Basic/BuiltinsNVPTX.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-nvptx-native-half-type-err.c clang/test/CodeGen/builtins-nvptx-native-half-type.c clang/test/CodeGen/builtins-nvptx.c llvm/test/CodeGen/NVPTX/ldu-ldg.ll Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll === --- llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -3,7 +3,13 @@ declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) @@ -16,70 +22,112 @@ ; CHECK: test_ldu_i8 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) { -; ldu.global.u8 + ; CHECK: ldu.global.u8 %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } +; CHECK: test_ldu_i16 +define i16 @test_ldu_i16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u16 + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret i16 %val +} + ; CHECK: test_ldu_i32 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) { -; ldu.global.u32 + ; CHECK: ldu.global.u32 %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } +; CHECK: test_ldu_i64 +define i64 @test_ldu_i64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u64 + %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret i64 %val +} + +; CHECK: test_ldu_f32 +define float @test_ldu_f32(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f32 + %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret float %val +} + +; CHECK: test_ldu_f64 +define double @test_ldu_f64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f64 + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret double %val +} + +; CHECK: test_ldu_f16 +define half @test_ldu_f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b16 + %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret half %val +} + +; CHECK: test_ldu_v2f16 +define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b32 + %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret <2 x half> %val +} + ; CHECK: test_ldg_i8 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) { -; ld.global.nc.u8 + ; CHECK: ld.global.nc.u8 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } ; CHECK: test_ldg_i16 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) { -; ld.global.nc.u16 - %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4) + ; CHECK: ld.global.nc.u16 + %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) ret i16 %val } ; CHECK: test_ldg_i32 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) { -; ld.global.nc.u32 + ; CHECK: ld.global.nc.u32 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } ; CHECK: test_ldg_i64 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.u64 %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) ret i64 %val } ; CHECK: test_ldg_f32 define float @test_ldg_f32(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.f32 %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) ret float %val } ; CHECK: test_ldg_f64 define double @test_ldg_f64(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.f64 %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) ret double %val } ; CHECK: test_ldg_f16 define half @test_ldg_f16(ptr addrspace(1) %ptr) { -; ld.global.nc.b16 - %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4) + ; CHECK: ld.global.nc.b16 + %val = tail call half
[PATCH] D145238: [NVPTX] Expose LDU builtins
tra added inline comments. Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18267-18271 +auto HalfSupport = HasHalfSupport(BuiltinID); +if (!HalfSupport.first) { + CGM.Error(E->getExprLoc(), +HalfSupport.second.append(" requires native half type support.") +.c_str()); I think we can simplify it all further. ``` auto HasHalfSupport = [&](unsigned BuiltinID) { auto = getContext(); return Context.getLangOpts().NativeHalfType || !Context.getTargetInfo().useFP16ConversionIntrinsics(); } ... if (!HasHalfSupport(BuiltinID)) { CGM.Error(E->getExprLoc(), getContext().BuiltinInfo.getName(BuiltinID) + " requires native half type support."); ``` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D145238: [NVPTX] Expose LDU builtins
jchlanda added inline comments. Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18116 +case NVPTX::BI__nvvm_ldu_h: + BuiltinName = "__nvvm_ldu_h"; + break; tra wrote: > Can we use the standard `StringRef Name = > getContext().BuiltinInfo.getName(BuiltinID);` to figure out the builtin name? Ha, had a feeling it would exist, couldn't find it. Thanks. Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18261-18263 + std::string ErrMsg{HalfSupport.second}; + CGM.Error(E->getExprLoc(), +ErrMsg.append(" requires native half type support.").c_str()); tra wrote: > Nit: this would be a bit more readable: > ``` > std::string BuiltinName{HalfSupport.second}; > CGM.Error(E->getExprLoc(), BuiltinName + " requires native half type > support.")` > ``` > You may also consider changing returned `BuiltinName` to be `std::string`, so > you would not need an explicit temp var. Done the `std::string` return, thanks. Comment at: llvm/test/CodeGen/NVPTX/ldu-ldg.ll:60 +define double @test_ldu_f64(ptr addrspace(1) %ptr) { +; ldu.global.u64 + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) tra wrote: > Hmm. I wonder why we end up with `u64` here and not `b64`. Not that it > matters in this case, but it is a discrepancy vs. `f16`. That is copy/paste sloppiness on my part, sorry. I've updated the test to check generated PTX, not just the labels, and fixed the values. It generates correct kinds of loads, based on the type, the only discrepancy is that it doesn't distinguish between signed and unsigned loads, always choosing the unsigned variant. I think that's by design, at [ISel](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp#L1683) there is a check if the load needs to be extended and a correct `CVT` instruction will be added. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D145238: [NVPTX] Expose LDU builtins
jchlanda updated this revision to Diff 502948. jchlanda marked 3 inline comments as done. jchlanda added a comment. Address PR comments Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 Files: clang/include/clang/Basic/BuiltinsNVPTX.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-nvptx-native-half-type-err.c clang/test/CodeGen/builtins-nvptx-native-half-type.c clang/test/CodeGen/builtins-nvptx.c llvm/test/CodeGen/NVPTX/ldu-ldg.ll Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll === --- llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -3,7 +3,13 @@ declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) @@ -16,70 +22,112 @@ ; CHECK: test_ldu_i8 define i8 @test_ldu_i8(ptr addrspace(1) %ptr) { -; ldu.global.u8 + ; CHECK: ldu.global.u8 %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } +; CHECK: test_ldu_i16 +define i16 @test_ldu_i16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u16 + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret i16 %val +} + ; CHECK: test_ldu_i32 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) { -; ldu.global.u32 + ; CHECK: ldu.global.u32 %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } +; CHECK: test_ldu_i64 +define i64 @test_ldu_i64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.u64 + %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret i64 %val +} + +; CHECK: test_ldu_f32 +define float @test_ldu_f32(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f32 + %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret float %val +} + +; CHECK: test_ldu_f64 +define double @test_ldu_f64(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.f64 + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret double %val +} + +; CHECK: test_ldu_f16 +define half @test_ldu_f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b16 + %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret half %val +} + +; CHECK: test_ldu_v2f16 +define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) { + ; CHECK: ldu.global.b32 + %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret <2 x half> %val +} + ; CHECK: test_ldg_i8 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) { -; ld.global.nc.u8 + ; CHECK: ld.global.nc.u8 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val } ; CHECK: test_ldg_i16 define i16 @test_ldg_i16(ptr addrspace(1) %ptr) { -; ld.global.nc.u16 - %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4) + ; CHECK: ld.global.nc.u16 + %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) ret i16 %val } ; CHECK: test_ldg_i32 define i32 @test_ldg_i32(ptr addrspace(1) %ptr) { -; ld.global.nc.u32 + ; CHECK: ld.global.nc.u32 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) ret i32 %val } ; CHECK: test_ldg_i64 define i64 @test_ldg_i64(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.u64 %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) ret i64 %val } ; CHECK: test_ldg_f32 define float @test_ldg_f32(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.f32 %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) ret float %val } ; CHECK: test_ldg_f64 define double @test_ldg_f64(ptr addrspace(1) %ptr) { -; ld.global.nc.u64 + ; CHECK: ld.global.nc.f64 %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) ret double %val } ; CHECK: test_ldg_f16 define half @test_ldg_f16(ptr addrspace(1) %ptr) { -; ld.global.nc.b16 - %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4) + ; CHECK: ld.global.nc.b16 +
[PATCH] D145238: [NVPTX] Expose LDU builtins
tra added a comment. Nice. Thank you! Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18104 CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - auto MakeLdg = [&](unsigned IntrinsicID) { + auto HasHalfSupport = [&](unsigned BuiltinID) { +auto = getContext(); I'd add a comment describing a meaning of the fields in the returned pair. Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18116 +case NVPTX::BI__nvvm_ldu_h: + BuiltinName = "__nvvm_ldu_h"; + break; Can we use the standard `StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);` to figure out the builtin name? Comment at: clang/lib/CodeGen/CGBuiltin.cpp:18261-18263 + std::string ErrMsg{HalfSupport.second}; + CGM.Error(E->getExprLoc(), +ErrMsg.append(" requires native half type support.").c_str()); Nit: this would be a bit more readable: ``` std::string BuiltinName{HalfSupport.second}; CGM.Error(E->getExprLoc(), BuiltinName + " requires native half type support.")` ``` You may also consider changing returned `BuiltinName` to be `std::string`, so you would not need an explicit temp var. Comment at: clang/test/CodeGen/builtins-nvptx-native-half-type-err.c:3 +// +// RUN: not %clang_cc1 -DLDG -fsyntax-only -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \ +// RUN: sm_75 -target-feature +ptx70 -fcuda-is-device -x cuda -emit-llvm -o - %s 2>&1 \ I think we could've done it all in one run if we were to do both ldg and ldu in one function. Comment at: llvm/test/CodeGen/NVPTX/ldu-ldg.ll:33 +; ldu.global.u16 + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4) + ret i16 %val Should alignment be 2 ? Comment at: llvm/test/CodeGen/NVPTX/ldu-ldg.ll:60 +define double @test_ldu_f64(ptr addrspace(1) %ptr) { +; ldu.global.u64 + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) Hmm. I wonder why we end up with `u64` here and not `b64`. Not that it matters in this case, but it is a discrepancy vs. `f16`. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D145238/new/ https://reviews.llvm.org/D145238 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D145238: [NVPTX] Expose LDU builtins
jchlanda created this revision. jchlanda added a reviewer: tra. Herald added subscribers: mattd, gchakrabarti, asavonic. Herald added a project: All. jchlanda requested review of this revision. Herald added subscribers: llvm-commits, cfe-commits, jholewinski. Herald added projects: clang, LLVM. Also check if native half types are supported to give more descriptive error message, without it clang only reports incorrect intrinsic return type. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D145238 Files: clang/include/clang/Basic/BuiltinsNVPTX.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-nvptx-native-half-type-err.c clang/test/CodeGen/builtins-nvptx-native-half-type.c clang/test/CodeGen/builtins-nvptx.c llvm/test/CodeGen/NVPTX/ldu-ldg.ll Index: llvm/test/CodeGen/NVPTX/ldu-ldg.ll === --- llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -3,7 +3,13 @@ declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) @@ -21,6 +27,13 @@ ret i8 %val } +; CHECK: test_ldu_i16 +define i16 @test_ldu_i16(ptr addrspace(1) %ptr) { +; ldu.global.u16 + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 4) + ret i16 %val +} + ; CHECK: test_ldu_i32 define i32 @test_ldu_i32(ptr addrspace(1) %ptr) { ; ldu.global.u32 @@ -28,6 +41,41 @@ ret i32 %val } +; CHECK: test_ldu_i64 +define i64 @test_ldu_i64(ptr addrspace(1) %ptr) { +; ldu.global.u64 + %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret i64 %val +} + +; CHECK: test_ldu_f32 +define float @test_ldu_f32(ptr addrspace(1) %ptr) { +; ldu.global.u64 + %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret float %val +} + +; CHECK: test_ldu_f64 +define double @test_ldu_f64(ptr addrspace(1) %ptr) { +; ldu.global.u64 + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret double %val +} + +; CHECK: test_ldu_f16 +define half @test_ldu_f16(ptr addrspace(1) %ptr) { +; ldu.global.b16 + %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 4) + ret half %val +} + +; CHECK: test_ldu_v2f16 +define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) { +; ldu.global.b32 + %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret <2 x half> %val +} + ; CHECK: test_ldg_i8 define i8 @test_ldg_i8(ptr addrspace(1) %ptr) { ; ld.global.nc.u8 Index: clang/test/CodeGen/builtins-nvptx.c === --- clang/test/CodeGen/builtins-nvptx.c +++ clang/test/CodeGen/builtins-nvptx.c @@ -652,6 +652,97 @@ __nvvm_ldg_d2((const double2 *)p); } +// CHECK-LABEL: nvvm_ldu +__device__ void nvvm_ldu(const void *p) { + // CHECK: call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) + // CHECK: call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) + __nvvm_ldu_c((const char *)p); + __nvvm_ldu_uc((const unsigned char *)p); + + // CHECK: call i16 @llvm.nvvm.ldu.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2) + // CHECK: call i16 @llvm.nvvm.ldu.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2) + __nvvm_ldu_s((const short *)p); + __nvvm_ldu_us((const unsigned short *)p); + + // CHECK: call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4) + // CHECK: call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4) + __nvvm_ldu_i((const int *)p); + __nvvm_ldu_ui((const unsigned int *)p); + + // LP32: call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4) + // LP32: call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4) + // LP64: call i64 @llvm.nvvm.ldu.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8) + // LP64: call i64 @llvm.nvvm.ldu.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8) + __nvvm_ldu_l((const long *)p); + __nvvm_ldu_ul((const unsigned long *)p); + + // CHECK: call float @llvm.nvvm.ldu.global.f.f32.p0(ptr {{%[0-9]+}}, i32 4) + __nvvm_ldu_f((const float *)p); + // CHECK: call double @llvm.nvvm.ldu.global.f.f64.p0(ptr {{%[0-9]+}}, i32 8) + __nvvm_ldu_d((const double *)p); + + // CHECK: call <2 x i8>