[PATCH] D81670: [TTI] Expose isNoopAddrSpaceCast from TLI.[SROA] Teach SROA to recognize no-op addrspacecast.

2020-06-11 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added a comment.

In D81670#2088304 , @hliao wrote:

> In D81670#2087974 , @arsenm wrote:
>
> > We should instead allow bitcast to perform no-op addrspacecasts
>
>
> That may be a little bit challenging as so far no-op `addrspacecast` is 
> target-specific. There may be no TTI available when `bitcast` is constructed.


Yes, that is the point. You don't need TTI to use it. Addrspace bitcast will be 
just a dumb bit reinterpret, not something that may change the value like 
addrspacecast


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81670/new/

https://reviews.llvm.org/D81670



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D81670: [TTI] Expose isNoopAddrSpaceCast from TLI.[SROA] Teach SROA to recognize no-op addrspacecast.

2020-06-11 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

In D81670#2087974 , @arsenm wrote:

> We should instead allow bitcast to perform no-op addrspacecasts


That may be a little bit challenging as so far no-op `addrspacecast` is 
target-specific. There may be no TTI available when `bitcast` is constructed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81670/new/

https://reviews.llvm.org/D81670



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D81670: [TTI] Expose isNoopAddrSpaceCast from TLI.[SROA] Teach SROA to recognize no-op addrspacecast.

2020-06-11 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added a comment.

We should instead allow bitcast to perform no-op addrspacecasts


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81670/new/

https://reviews.llvm.org/D81670



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D81670: [TTI] Expose isNoopAddrSpaceCast from TLI.[SROA] Teach SROA to recognize no-op addrspacecast.

2020-06-11 Thread Michael Liao via Phabricator via cfe-commits
hliao updated this revision to Diff 270177.
hliao added a comment.

Revise the formatting.

Updating D81670: [TTI] Expose isNoopAddrSpaceCast from TLI.
===

[SROA] Teach SROA to recognize no-op addrspacecast.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81670/new/

https://reviews.llvm.org/D81670

Files:
  clang/test/CodeGen/thinlto-distributed-newpm.ll
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
  llvm/include/llvm/Analysis/TargetTransformInfo.h
  llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/Transforms/Scalar/SROA.h
  llvm/lib/Analysis/TargetTransformInfo.cpp
  llvm/lib/Transforms/Scalar/SROA.cpp
  llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
  llvm/test/Transforms/SROA/noop-addrspacecast.ll

Index: llvm/test/Transforms/SROA/noop-addrspacecast.ll
===
--- /dev/null
+++ llvm/test/Transforms/SROA/noop-addrspacecast.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -o - -sroa %s | FileCheck %s
+; RUN: opt -S -o - -passes=sroa %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+; CHECK-LABEL: @noop_addrspacecast(
+; CHECK-NEXT: = addrspacecast i32 addrspace(1)* %{{.*}} to i32*
+; CHECK-NEXT: store i32 0, i32* %{{.*}}
+; CHECK-NEXT: ret void
+define void @noop_addrspacecast(i32 addrspace(1)* %x.coerce) {
+  %x = alloca i32*, align 8, addrspace(5)
+  %x1 = addrspacecast i32* addrspace(5)* %x to i32**
+  %x2 = bitcast i32** %x1 to i32 addrspace(1)**
+  store i32 addrspace(1)* %x.coerce, i32 addrspace(1)** %x2
+  %x3 = load i32*, i32** %x1
+  store i32 0, i32* %x3
+  ret void
+}
Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
===
--- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -123,15 +123,15 @@
 ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
 ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run.
 ; CHECK-O-NEXT: Running pass: SROA
-; These next two can appear in any order since they are accessed as parameters
+; These next three can appear in any order since they are accessed as parameters
 ; on the same call to SROA::runImpl
+; CHECK-O1-DAG: Running analysis: TargetIRAnalysis on foo
+; CHECK-O2-DAG: Running analysis: TargetIRAnalysis on foo
+; CHECK-Os-DAG: Running analysis: TargetIRAnalysis on foo
+; CHECK-Oz-DAG: Running analysis: TargetIRAnalysis on foo
 ; CHECK-O-DAG: Running analysis: DominatorTreeAnalysis on foo
 ; CHECK-O-DAG: Running analysis: AssumptionAnalysis on foo
 ; CHECK-O-NEXT: Running pass: EarlyCSEPass
-; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis on foo
-; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo
-; CHECK-Os-NEXT: Running analysis: TargetIRAnalysis on foo
-; CHECK-Oz-NEXT: Running analysis: TargetIRAnalysis on foo
 ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
 ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
 ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
Index: llvm/lib/Transforms/Scalar/SROA.cpp
===
--- llvm/lib/Transforms/Scalar/SROA.cpp
+++ llvm/lib/Transforms/Scalar/SROA.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/PtrUseVisitor.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -1677,7 +1678,9 @@
 /// ensure that we only try to convert viable values. The strategy is that we
 /// will peel off single element struct and array wrappings to get to an
 /// underlying value, and convert that value.
-static bool canConvertValue(const DataLayout , Type *OldTy, Type *NewTy) {
+static bool canConvertValue(const DataLayout ,
+const TargetTransformInfo , Type *OldTy,
+Type *NewTy) {
   if (OldTy == NewTy)
 return true;
 
@@ -1703,8 +1706,11 @@
   NewTy = NewTy->getScalarType();
   if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
-  return cast(NewTy)->getPointerAddressSpace() ==
-cast(OldTy)->getPointerAddressSpace();
+  // Pointers are convertible if they have the same address space or that
+  // address space casting is a no-op.
+  unsigned OldAS = cast(OldTy)->getPointerAddressSpace();
+  unsigned NewAS = cast(NewTy)->getPointerAddressSpace();
+  return OldAS == NewAS || TTI.isNoopAddrSpaceCast(OldAS, NewAS);
 }
 
 // 

[PATCH] D81670: [TTI] Expose isNoopAddrSpaceCast from TLI.[SROA] Teach SROA to recognize no-op addrspacecast.

2020-06-11 Thread Michael Liao via Phabricator via cfe-commits
hliao created this revision.
hliao added reviewers: arsenm, chandlerc.
Herald added subscribers: llvm-commits, cfe-commits, kerbowa, dexonsmith, 
steven_wu, hiraditya, nhaehnle, wdng, jvesely.
Herald added projects: clang, LLVM.

So far, SROA could only handle convertible pointer pairs if they are in the
same address space. Just like no-op cast, a no-op `addrspacecast` also changes
no bits, it could also be used to convert pointer pairs from different address
spaces. That benefits `infer-address-spaces` pass to propagate address spaces.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D81670

Files:
  clang/test/CodeGen/thinlto-distributed-newpm.ll
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
  llvm/include/llvm/Analysis/TargetTransformInfo.h
  llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/Transforms/Scalar/SROA.h
  llvm/lib/Analysis/TargetTransformInfo.cpp
  llvm/lib/Transforms/Scalar/SROA.cpp
  llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
  llvm/test/Transforms/SROA/noop-addrspacecast.ll

Index: llvm/test/Transforms/SROA/noop-addrspacecast.ll
===
--- /dev/null
+++ llvm/test/Transforms/SROA/noop-addrspacecast.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -o - -sroa %s | FileCheck %s
+; RUN: opt -S -o - -passes=sroa %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+; CHECK-LABEL: @noop_addrspacecast(
+; CHECK-NEXT: = addrspacecast i32 addrspace(1)* %{{.*}} to i32*
+; CHECK-NEXT: store i32 0, i32* %{{.*}}
+; CHECK-NEXT: ret void
+define void @noop_addrspacecast(i32 addrspace(1)* %x.coerce) {
+  %x = alloca i32*, align 8, addrspace(5)
+  %x1 = addrspacecast i32* addrspace(5)* %x to i32**
+  %x2 = bitcast i32** %x1 to i32 addrspace(1)**
+  store i32 addrspace(1)* %x.coerce, i32 addrspace(1)** %x2
+  %x3 = load i32*, i32** %x1
+  store i32 0, i32* %x3
+  ret void
+}
Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
===
--- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -123,15 +123,15 @@
 ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
 ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run.
 ; CHECK-O-NEXT: Running pass: SROA
-; These next two can appear in any order since they are accessed as parameters
+; These next three can appear in any order since they are accessed as parameters
 ; on the same call to SROA::runImpl
+; CHECK-O1-DAG: Running analysis: TargetIRAnalysis on foo
+; CHECK-O2-DAG: Running analysis: TargetIRAnalysis on foo
+; CHECK-Os-DAG: Running analysis: TargetIRAnalysis on foo
+; CHECK-Oz-DAG: Running analysis: TargetIRAnalysis on foo
 ; CHECK-O-DAG: Running analysis: DominatorTreeAnalysis on foo
 ; CHECK-O-DAG: Running analysis: AssumptionAnalysis on foo
 ; CHECK-O-NEXT: Running pass: EarlyCSEPass
-; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis on foo
-; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo
-; CHECK-Os-NEXT: Running analysis: TargetIRAnalysis on foo
-; CHECK-Oz-NEXT: Running analysis: TargetIRAnalysis on foo
 ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
 ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass
 ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
Index: llvm/lib/Transforms/Scalar/SROA.cpp
===
--- llvm/lib/Transforms/Scalar/SROA.cpp
+++ llvm/lib/Transforms/Scalar/SROA.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/PtrUseVisitor.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -1677,7 +1678,9 @@
 /// ensure that we only try to convert viable values. The strategy is that we
 /// will peel off single element struct and array wrappings to get to an
 /// underlying value, and convert that value.
-static bool canConvertValue(const DataLayout , Type *OldTy, Type *NewTy) {
+static bool canConvertValue(const DataLayout ,
+const TargetTransformInfo , Type *OldTy,
+Type *NewTy) {
   if (OldTy == NewTy)
 return true;
 
@@ -1703,8 +1706,11 @@
   NewTy = NewTy->getScalarType();
   if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
-  return cast(NewTy)->getPointerAddressSpace() ==
-cast(OldTy)->getPointerAddressSpace();
+  // Pointers are convertible if they have the same address space or that
+  // address space casting is a no-op.
+