[PATCH] D152275: Use memory region declaration intrinsic when generating code for array subscripts

2023-08-07 Thread Simeon Krastnikov via Phabricator via cfe-commits
simeon updated this revision to Diff 547763.
simeon added a comment.
Herald added subscribers: llvm-commits, kmitropoulou, ChuanqiXu, pengfei, 
asbirlea, haicheng, hiraditya, jvesely.
Herald added a project: LLVM.

The patch now includes the changes that need to be made to the optimization 
passes we observed to be most negatively affected by the introduction of the 
intrinsic, primarily InstCombine and SROA. However, it is not comprehensive: we 
also observed missed optimization opportunities in other passes such as 
GlobalOpt and MergedLoadStoreMotionPass.

A large number of hand-written unit tests need to be updated by hand but as 
they are very sensitive to the smallest change in the definition of the 
intrinsic, I am postponing this until we have some initial approval.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152275/new/

https://reviews.llvm.org/D152275

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/test/CodeGen/2005-01-02-ConstantInits.c
  clang/test/CodeGen/X86/va-arg-sse.c
  clang/test/CodeGen/builtin-align-array.c
  clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp
  llvm/include/llvm/Analysis/PtrUseVisitor.h
  llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
  llvm/include/llvm/IR/InstVisitor.h
  llvm/include/llvm/IR/IntrinsicInst.h
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/Analysis/AliasSetTracker.cpp
  llvm/lib/Analysis/BasicAliasAnalysis.cpp
  llvm/lib/Analysis/ConstantFolding.cpp
  llvm/lib/Analysis/InlineCost.cpp
  llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
  llvm/lib/Analysis/MemoryLocation.cpp
  llvm/lib/Analysis/MemorySSA.cpp
  llvm/lib/Analysis/ObjCARCInstKind.cpp
  llvm/lib/Analysis/ValueTracking.cpp
  llvm/lib/CodeGen/CodeGenPrepare.cpp
  llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
  llvm/lib/IR/Value.cpp
  llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
  llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
  llvm/lib/Transforms/Scalar/SROA.cpp
  llvm/lib/Transforms/Utils/Local.cpp
  llvm/test/Transforms/InstCombine/gep-mem-reg-decl.ll
  llvm/test/Transforms/SROA/mem-reg-decl.ll

Index: llvm/test/Transforms/SROA/mem-reg-decl.ll
===
--- /dev/null
+++ llvm/test/Transforms/SROA/mem-reg-decl.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
+; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
+
+declare ptr @llvm.memory.region.decl.p0(ptr readnone, i64, i64)
+
+; ensure that SROA can "see through" the intrinsic call
+define i32 @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:ret i32 1
+;
+entry:
+  %s = alloca { i32 }, align 8
+  store i32 1, ptr %s, align 8
+  %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 8)
+  %0 = load i32, ptr %arrayidx.bounded, align 8
+  ret i32 %0
+}
+
+; variation of the above test
+define i32 @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[ADD:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT:ret i32 [[ADD]]
+;
+entry:
+  %s = alloca [1024 x i32], align 4
+  %t = alloca [1024 x i32], align 4
+  %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 4096)
+  %0 = load i32, ptr %arrayidx.bounded, align 4
+  %arrayidx.bounded1 = call ptr @llvm.memory.region.decl.p0(ptr %t, i64 0, i64 4096)
+  %arrayidx2 = getelementptr inbounds i32, ptr %arrayidx.bounded1, i32 1
+  %1 = load i32, ptr %arrayidx2, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-MODIFY-CFG: {{.*}}
+; CHECK-PRESERVE-CFG: {{.*}}
Index: llvm/test/Transforms/InstCombine/gep-mem-reg-decl.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/gep-mem-reg-decl.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S < %s -passes=instcombine | FileCheck %s
+
+%struct.S = type { [1024 x i32], [1024 x i32] }
+
+declare ptr @llvm.memory.region.decl.p0(ptr readnone, i64, i64)
+
+; test that a GEP of a GEP can be combined in the presence of
+; intermediate intrinsic calls
+define i32 @test_gep_of_gep(ptr noundef %s, i64 %i) {
+; CHECK-LABEL: @test_gep_of_gep(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[ARRAYIDX21:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[S:%.*]], i64 0, i32 1, i64 [[I:%.*]]
+; CHECK-NEXT:[[ARRAYIDX_BOUNDED:%.*]] = call ptr @llvm.memory.region.decl.p0(ptr nonnull [[ARRAYIDX21]], i64 0, i64 4096)
+; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_BOUNDED]], align 4
+; CHECK-NEXT:ret i32 [[TMP0]]
+;
+entry:
+  %arrayidx.bounded = call ptr @llvm.memory.region.decl.p0(ptr %s, i64 0, i64 4096)
+  %B = getelementptr inbounds %struct.S, ptr %s, i32 0, i32 1
+  %arr

[PATCH] D152275: Use memory region declaration intrinsic when generating code for array subscripts

2023-06-15 Thread Simeon Krastnikov via Phabricator via cfe-commits
simeon added a comment.

> - The compile-time overhead of creating a bunch of extra intrinsics might be 
> significant.  Maybe we can mitigate to some extent by avoiding emitting the 
> intrinsic in simple cases where it doesn't actually help (constant indexes?).

Yes, that would make sense.

> - User code might not actually obey the language rules; do we have any 
> sanitizer that checks if user code trips over this?

I believe AddressSanitizer  
should be able to detect out-of-bounds accesses.

> - Not sure how this interacts with full restrict and related proposals.

So far as alias analysis goes, the two approaches should be orthogonal, but 
nevertheless we'd still have to go through the usual procedure of modifying the 
direct `GetElementPtrInst` checks and casts used by such patches.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D152275/new/

https://reviews.llvm.org/D152275

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D152275: Use memory region declaration intrinsic when generating code for array subscripts

2023-06-06 Thread Simeon Krastnikov via Phabricator via cfe-commits
simeon created this revision.
simeon added reviewers: efriedma, lebedev.ri, arichardson.
Herald added a subscriber: StephenFan.
Herald added a project: All.
simeon requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

As an alternative to https://reviews.llvm.org/D150192, we consider using the 
memory region declaration intrinsic, introduced in 
https://reviews.llvm.org/D115274, as another way to propagate the semantics of 
C/C++ array-indexing rules to LLVM IR.

This patch is only only a proof-of-concept at this point, intended to gather 
feedback and bring https://reviews.llvm.org/D115274 back into focus. As was 
mentioned there (and as seen from test results), completing this patch would 
require patching a lot of optimizations. For example, 
`InstructionCombiningPass` works with `GetElementPtrInst`s directly and will 
not recognize an intrinsic call as such. Many other passes such as 
`IndVarSimplifyPass`, `EarlyCSE`, etc., routinely use a switch case or call 
`isa()` to detect `GEP`s.

We'd be fine with doing that sort of work, if we get some initial thumbs up 
from the community that this is the way to go. Of course, we're also open to 
trying a different approach if it avoids the apparently unavoidable complexity 
of this one.

Depends on D115274 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D152275

Files:
  clang/lib/CodeGen/CGExpr.cpp


Index: clang/lib/CodeGen/CGExpr.cpp
===
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -3936,10 +3936,44 @@
 
 // Propagate the alignment from the array itself to the result.
 QualType arrayType = Array->getType();
-Addr = emitArraySubscriptGEP(
-*this, ArrayLV.getAddress(*this), {CGM.getSize(CharUnits::Zero()), 
Idx},
-E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices,
-E->getExprLoc(), &arrayType, E->getBase());
+
+Address ArrayLVAddr = ArrayLV.getAddress(*this);
+
+if (!getLangOpts().isSignedOverflowDefined() && 
+// ISO/IEC 9899:TC3, 6.5.6.8
+(getLangOpts().C99 || getLangOpts().CPlusPlus) &&
+getContext().getAsConstantArrayType(arrayType)) {
+  auto *CAT = getContext().getAsConstantArrayType(arrayType);
+  uint64_t BoundedRegionSize = (Idx->getType()->getScalarSizeInBits() / 8) 
* 
+  CAT->getSize().getZExtValue();
+
+  Address BeginOff = emitArraySubscriptGEP(
+  *this, ArrayLVAddr, 
+  {CGM.getSize(CharUnits::Zero()), CGM.getSize(CharUnits::Zero())},
+  E->getType(), !getLangOpts().isSignedOverflowDefined(), 
SignedIndices,
+  E->getExprLoc(), &arrayType, E->getBase());
+
+  llvm::Function *F = 
CGM.getIntrinsic(llvm::Intrinsic::memory_region_decl, 
+  BeginOff.getPointer()->getType());
+  llvm::Value *Call = Builder.CreateCall(F,
+  {BeginOff.getPointer(), 
+  llvm::ConstantInt::get(Int64Ty, 0), 
+  llvm::ConstantInt::get(Int64Ty, BoundedRegionSize)},
+  "arrayidx.bounded");
+  Address RetAddr(Call, BeginOff.getElementType(), 
+  ArrayLVAddr.getAlignment());
+
+  Addr = emitArraySubscriptGEP(
+  *this, RetAddr, {Idx},
+  E->getType(), !getLangOpts().isSignedOverflowDefined(), 
SignedIndices,
+  E->getExprLoc(), &arrayType, E->getBase());
+} else {
+  Addr = emitArraySubscriptGEP(
+  *this, ArrayLVAddr, {CGM.getSize(CharUnits::Zero()), Idx},
+  E->getType(), !getLangOpts().isSignedOverflowDefined(), 
SignedIndices,
+  E->getExprLoc(), &arrayType, E->getBase());
+}
+
 EltBaseInfo = ArrayLV.getBaseInfo();
 EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
   } else {


Index: clang/lib/CodeGen/CGExpr.cpp
===
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -3936,10 +3936,44 @@
 
 // Propagate the alignment from the array itself to the result.
 QualType arrayType = Array->getType();
-Addr = emitArraySubscriptGEP(
-*this, ArrayLV.getAddress(*this), {CGM.getSize(CharUnits::Zero()), Idx},
-E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices,
-E->getExprLoc(), &arrayType, E->getBase());
+
+Address ArrayLVAddr = ArrayLV.getAddress(*this);
+
+if (!getLangOpts().isSignedOverflowDefined() && 
+// ISO/IEC 9899:TC3, 6.5.6.8
+(getLangOpts().C99 || getLangOpts().CPlusPlus) &&
+getContext().getAsConstantArrayType(arrayType)) {
+  auto *CAT = getContext().getAsConstantArrayType(arrayType);
+  uint64_t BoundedRegionSize = (Idx->getType()->getScalarSizeInBits() / 8) * 
+  CAT->getSize().getZExtValue();
+
+  Address BeginOff = emitArraySubscriptGEP(
+  *this, ArrayLVAddr, 
+  {CGM.getSize(CharUn

[PATCH] D150192: Allow clang to emit inrange metadata when generating code for array subscripts

2023-05-09 Thread Simeon Krastnikov via Phabricator via cfe-commits
simeon-imgtec created this revision.
simeon-imgtec added reviewers: pcc, eli.friedman.
simeon-imgtec created this object with visibility "All Users".
Herald added subscribers: mattd, asavonic, jdoerfert, pengfei, hiraditya, 
jvesely.
Herald added a project: All.
simeon-imgtec requested review of this revision.
Herald added a reviewer: jdoerfert.
Herald added subscribers: llvm-commits, cfe-commits, jplehr, sstefan1.
Herald added projects: clang, LLVM.

As out-of-bounds array accesses are undefined in C/C++ 
, we can capture 
this restriction using the newly introduced inrange attribute on individual GEP 
indices. The underlying motivation is that it would enable less conservative 
inferences in (basic) alias analysis, in turn improving passes such as GVN and 
LICM.

As the inrange keyword is currently only supported on constant GEPs, we extend 
the support to GEP instructions, and mofiy clang to emit the inrange flag in an 
array subscript scenario where it is safe to do so.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D150192

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/test/CodeGen/2005-01-02-ConstantInits.c
  clang/test/CodeGen/2010-07-14-ref-off-end.c
  clang/test/CodeGen/X86/va-arg-sse.c
  clang/test/CodeGen/builtin-align-array.c
  clang/test/CodeGen/exprs.c
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGen/ubsan-pointer-overflow.c
  clang/test/CodeGen/unaligned-expr.c
  clang/test/CodeGen/union-tbaa1.c
  clang/test/CodeGen/vla.c
  clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp
  clang/test/CodeGenCXX/compound-literals.cpp
  clang/test/CodeGenCXX/cxx1z-decomposition.cpp
  clang/test/CodeGenCXX/lambda-expressions.cpp
  clang/test/CodeGenCXX/no-odr-use.cpp
  clang/test/CodeGenCXX/pr45964-decomp-transform.cpp
  clang/test/CodeGenObjC/arc.m
  clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
  clang/test/OpenMP/align_clause_codegen.cpp
  clang/test/OpenMP/declare_mapper_codegen.cpp
  clang/test/OpenMP/distribute_firstprivate_codegen.cpp
  clang/test/OpenMP/distribute_lastprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
  clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
  clang/test/OpenMP/distribute_private_codegen.cpp
  clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
  clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
  clang/test/OpenMP/distribute_simd_private_codegen.cpp
  clang/test/OpenMP/for_firstprivate_codegen.cpp
  clang/test/OpenMP/for_lastprivate_codegen.cpp
  clang/test/OpenMP/for_private_codegen.cpp
  clang/test/OpenMP/for_reduction_codegen.cpp
  clang/test/OpenMP/for_reduction_codegen_UDR.cpp
  clang/test/OpenMP/for_scan_codegen.cpp
  clang/test/OpenMP/for_simd_scan_codegen.cpp
  clang/test/OpenMP/irbuilder_simd_aligned.cpp
  clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
  clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
  clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
  clang/test/OpenMP/nvptx_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
  clang/test/OpenMP/nvptx_target_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
  clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
  clang/test/OpenMP/nvptx_target_simd_codegen.cpp
  clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
  clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
  clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
  clang/test/OpenMP/ordered_codegen.cpp
  clang/test/OpenMP/parallel_copyin_codegen.cpp
  clang/test/OpenMP/parallel_firstprivate_codegen.cpp
  clang/test/OpenMP/parallel_for_scan_codegen.cpp
  clang/test/OpenMP/parallel_for_simd_scan_codegen.cpp
  clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp
  clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
  clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp
  clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
  clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
  clang/test/OpenMP/parallel_private_codegen.cpp
  clang/test/OpenMP/parallel_reduction_codegen.cpp
  clang/test/OpenMP/reduction_implicit_map.cpp
  clang/test/OpenMP/sections_firstprivate_codegen.cpp
  clang/test/OpenMP/sections_lastprivate_codegen.cpp
  clang/test/OpenMP/sections_private_codegen.cpp
  clang/test/OpenMP/sections_reduction_codegen.cpp
  clang/test/OpenMP/single_firstprivate_codegen.cpp
  clang/test/OpenMP/single_private_codegen.cpp