tejohnson updated this revision to Diff 517678.
tejohnson added a comment.

Expand command and patch description for mechanism used by distributed ThinLTO


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D149215/new/

https://reviews.llvm.org/D149215

Files:
  clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll
  llvm/include/llvm/IR/ModuleSummaryIndex.h
  llvm/include/llvm/LTO/LTO.h
  llvm/lib/Bitcode/Reader/BitcodeReader.cpp
  llvm/lib/IR/ModuleSummaryIndex.cpp
  llvm/lib/LTO/LTO.cpp
  llvm/lib/LTO/LTOBackend.cpp
  llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
  llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll
  llvm/test/ThinLTO/X86/memprof-basic.ll
  llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
  llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll
  llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll
  llvm/test/ThinLTO/X86/memprof-indirectcall.ll
  llvm/test/ThinLTO/X86/memprof-inlined.ll
  llvm/test/ThinLTO/X86/memprof-inlined2.ll
  llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll
  llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
  llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
  llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
  llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll
  llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
  llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
  llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll

Index: llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
===================================================================
--- llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
+++ llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
@@ -42,7 +42,7 @@
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
 
-; RUN: opt -passes=memprof-context-disambiguation \
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP
 
Index: llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
===================================================================
--- llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
+++ llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
@@ -41,7 +41,7 @@
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
 
-; RUN: opt -passes=memprof-context-disambiguation \
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
 ; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
Index: llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
===================================================================
--- llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
+++ llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
@@ -51,7 +51,7 @@
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
 
-; RUN: opt -passes=memprof-context-disambiguation \
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
 ; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
Index: llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll
===================================================================
--- llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll
+++ llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll
@@ -45,7 +45,7 @@
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
 
-; RUN: opt -passes=memprof-context-disambiguation \
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
 ; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
Index: llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
===================================================================
--- llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
+++ llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
@@ -93,7 +93,7 @@
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
 
-; RUN: opt -passes=memprof-context-disambiguation \
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
 ; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
Index: llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
===================================================================
--- llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
+++ llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
@@ -53,7 +53,7 @@
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
 
-; RUN: opt -passes=memprof-context-disambiguation \
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
 ; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
 ; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
Index: llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
===================================================================
--- llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -31,7 +31,7 @@
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
 
-; RUN: opt -passes=memprof-context-disambiguation \
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
 ; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
 ; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
 ; RUN:	-stats -pass-remarks=memprof-context-disambiguation \
@@ -42,6 +42,15 @@
 ;; We should have cloned bar, baz, and foo, for the cold memory allocation.
 ; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
 
+;; Check again without -supports-hot-cold-new and ensure all MIB are cold and
+;; that there is no cloning.
+; RUN: opt -passes=memprof-context-disambiguation \
+; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
+; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
+; RUN:	-stats -pass-remarks=memprof-context-disambiguation \
+; RUN:	%s -S 2>&1 | FileCheck %s --implicit-check-not="Callsite Context Graph" \
+; RUN:	--implicit-check-not="created clone"
+
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
Index: llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll
===================================================================
--- /dev/null
+++ llvm/test/ThinLTO/X86/memprof-supports-hot-cold-new.ll
@@ -0,0 +1,57 @@
+;; Test that passing -supports-hot-cold-new to the LTO link allows context
+;; disambiguation to proceed, and also prevents memprof metadata and attributes
+;; from being removed from the LTO backend, and vice versa without passing
+;; -supports-hot-cold-new.
+
+;; First check with -supports-hot-cold-new.
+; RUN: opt -thinlto-bc %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-memprof-dump-ccg \
+; RUN:	 -save-temps \
+; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
+; DUMP: Callsite Context Graph:
+
+; RUN: llvm-dis %t.out.1.0.preopt.bc -o - | FileCheck %s --check-prefix=IR
+; IR: !memprof {{.*}} !callsite
+; IR: "memprof"="cold"
+
+;; Next check without -supports-hot-cold-new, we should not perform
+;; context disambiguation, and we should strip memprof metadata and
+;; attributes before optimization.
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-memprof-dump-ccg \
+; RUN:	 -save-temps \
+; RUN:	-o %t.out 2>&1 | FileCheck %s --allow-empty \
+; RUN:  --implicit-check-not "Callsite Context Graph:"
+
+; RUN: llvm-dis %t.out.1.0.preopt.bc -o - | FileCheck %s \
+; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \
+; RUN: --implicit-check-not "memprof"="cold"
+
+source_filename = "memprof-supports-hot-cold-new.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5
+  %call2 = call ptr @_Znam(i64 0) #1
+  ret i32 0
+}
+
+declare ptr @_Znam(i64)
+
+attributes #0 = { noinline optnone }
+attributes #1 = { "memprof"="cold" }
+
+!0 = !{!1, !3}
+!1 = !{!2, !"notcold"}
+!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!3 = !{!4, !"cold"}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!5 = !{i64 9086428284934609951}
Index: llvm/test/ThinLTO/X86/memprof-inlined2.ll
===================================================================
--- llvm/test/ThinLTO/X86/memprof-inlined2.ll
+++ llvm/test/ThinLTO/X86/memprof-inlined2.ll
@@ -44,6 +44,7 @@
 
 ; RUN: opt -thinlto-bc %s >%t.o
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
 ; RUN:  -r=%t.o,main,plx \
 ; RUN:  -r=%t.o,_Z3barv,plx \
 ; RUN:  -r=%t.o,_Z3bazv,plx \
Index: llvm/test/ThinLTO/X86/memprof-inlined.ll
===================================================================
--- llvm/test/ThinLTO/X86/memprof-inlined.ll
+++ llvm/test/ThinLTO/X86/memprof-inlined.ll
@@ -43,6 +43,7 @@
 
 ; RUN: opt -thinlto-bc %s >%t.o
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
 ; RUN:	-r=%t.o,main,plx \
 ; RUN:	-r=%t.o,_ZdaPv, \
 ; RUN:	-r=%t.o,sleep, \
@@ -64,6 +65,7 @@
 
 ;; Try again but with distributed ThinLTO
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
 ; RUN:  -thinlto-distributed-indexes \
 ; RUN:  -r=%t.o,main,plx \
 ; RUN:  -r=%t.o,_ZdaPv, \
Index: llvm/test/ThinLTO/X86/memprof-indirectcall.ll
===================================================================
--- llvm/test/ThinLTO/X86/memprof-indirectcall.ll
+++ llvm/test/ThinLTO/X86/memprof-indirectcall.ll
@@ -53,6 +53,7 @@
 
 ; RUN: opt -thinlto-bc %s >%t.o
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
 ; RUN:  -r=%t.o,main,plx \
 ; RUN:  -r=%t.o,sleep, \
 ; RUN:  -r=%t.o,_Znam, \
@@ -75,6 +76,7 @@
 
 ;; Try again but with distributed ThinLTO
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
 ; RUN:  -thinlto-distributed-indexes \
 ; RUN:  -r=%t.o,main,plx \
 ; RUN:  -r=%t.o,_ZdaPv, \
Index: llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll
===================================================================
--- llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll
+++ llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll
@@ -48,6 +48,7 @@
 
 ; RUN: opt -thinlto-bc %s >%t.o
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
 ; RUN:  -r=%t.o,main,plx \
 ; RUN:  -r=%t.o,_ZdaPv, \
 ; RUN:  -r=%t.o,sleep, \
@@ -62,6 +63,7 @@
 
 ;; Try again but with distributed ThinLTO
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:  -supports-hot-cold-new \
 ; RUN:  -thinlto-distributed-indexes \
 ; RUN:  -r=%t.o,main,plx \
 ; RUN:  -r=%t.o,_ZdaPv, \
Index: llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll
===================================================================
--- llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll
+++ llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll
@@ -95,6 +95,7 @@
 
 ; RUN: opt -thinlto-bc %s >%t.o
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
 ; RUN:  -r=%t.o,main,plx \
 ; RUN:  -r=%t.o,_Z1Db,plx \
 ; RUN:  -r=%t.o,_Z1Cb,plx \
Index: llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
===================================================================
--- llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
+++ llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
@@ -55,6 +55,7 @@
 
 ; RUN: opt -thinlto-bc %s >%t.o
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
 ; RUN:  -r=%t.o,main,plx \
 ; RUN:  -r=%t.o,_ZdaPv, \
 ; RUN:  -r=%t.o,sleep, \
@@ -75,6 +76,7 @@
 
 ;; Try again but with distributed ThinLTO
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
 ; RUN:  -thinlto-distributed-indexes \
 ; RUN:  -r=%t.o,main,plx \
 ; RUN:  -r=%t.o,_ZdaPv, \
Index: llvm/test/ThinLTO/X86/memprof-basic.ll
===================================================================
--- llvm/test/ThinLTO/X86/memprof-basic.ll
+++ llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -33,6 +33,7 @@
 
 ; RUN: opt -thinlto-bc %s >%t.o
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
 ; RUN:	-r=%t.o,main,plx \
 ; RUN:	-r=%t.o,_ZdaPv, \
 ; RUN:	-r=%t.o,sleep, \
@@ -52,6 +53,7 @@
 
 ;; Try again but with distributed ThinLTO
 ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
 ; RUN:  -thinlto-distributed-indexes \
 ; RUN:	-r=%t.o,main,plx \
 ; RUN:	-r=%t.o,_ZdaPv, \
Index: llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll
===================================================================
--- /dev/null
+++ llvm/test/LTO/X86/memprof-supports-hot-cold-new.ll
@@ -0,0 +1,65 @@
+;; Test that passing -supports-hot-cold-new to the LTO link allows context
+;; disambiguation to proceed, and also prevents memprof metadata and attributes
+;; from being removed from the LTO backend, and vice versa without passing
+;; -supports-hot-cold-new.
+
+;; Note that this tests regular LTO (with a summary) due to the module flag
+;; disabling ThinLTO.
+
+;; First check with -supports-hot-cold-new.
+; RUN: opt -module-summary %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-supports-hot-cold-new \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-memprof-dump-ccg \
+; RUN:	 -save-temps \
+; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
+; DUMP: Callsite Context Graph:
+
+; RUN: llvm-dis %t.out.0.0.preopt.bc -o - | FileCheck %s --check-prefix=IR
+; IR: !memprof {{.*}} !callsite
+; IR: "memprof"="cold"
+
+;; Next check without -supports-hot-cold-new, we should not perform
+;; context disambiguation, and we should strip memprof metadata and
+;; attributes before optimization.
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN:	-r=%t.o,main,plx \
+; RUN:	-r=%t.o,_Znam, \
+; RUN:	-memprof-dump-ccg \
+; RUN:	 -save-temps \
+; RUN:	-o %t.out 2>&1 | FileCheck %s --allow-empty \
+; RUN:  --implicit-check-not "Callsite Context Graph:"
+
+; RUN: llvm-dis %t.out.0.0.preopt.bc -o - | FileCheck %s \
+; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \
+; RUN: --implicit-check-not "memprof"="cold"
+
+source_filename = "memprof-supports-hot-cold-new.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5
+  %call2 = call ptr @_Znam(i64 0) #1
+  ret i32 0
+}
+
+declare ptr @_Znam(i64)
+
+attributes #0 = { noinline optnone }
+attributes #1 = { "memprof"="cold" }
+
+!llvm.module.flags = !{!6}
+
+!0 = !{!1, !3}
+!1 = !{!2, !"notcold"}
+!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!3 = !{!4, !"cold"}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!5 = !{i64 9086428284934609951}
+
+;; Force regular LTO even though we have a summary.
+!6 = !{i32 1, !"ThinLTO", i32 0}
Index: llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
===================================================================
--- llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -104,6 +104,12 @@
     cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
     cl::Hidden);
 
+// Indicate we are linking with an allocator that supports hot/cold operator
+// new interfaces.
+cl::opt<bool> SupportsHotColdNew(
+    "supports-hot-cold-new", cl::init(false), cl::Hidden,
+    cl::desc("Linking with hot/cold operator new interfaces"));
+
 /// CRTP base for graphs built from either IR or ThinLTO summary index.
 ///
 /// The graph represents the call contexts in all memprof metadata on allocation
@@ -3154,6 +3160,17 @@
     return Changed;
   }
 
+  // TODO: If/when other types of memprof cloning are enabled beyond just for
+  // hot and cold, we will need to change this to individually control the
+  // AllocationType passed to addStackNodesForMIB during CCG construction.
+  // Note that we specifically check this after applying imports above, so that
+  // the option isn't needed to be passed to distributed ThinLTO backend
+  // clang processes, which won't necessarily have visibility into the linker
+  // dependences. Instead the information is communicated from the LTO link to
+  // the backends via the combined summary index.
+  if (!SupportsHotColdNew)
+    return Changed;
+
   ModuleCallsiteContextGraph CCG(M, OREGetter);
   Changed = CCG.process();
 
@@ -3193,6 +3210,14 @@
     ModuleSummaryIndex &Index,
     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
         isPrevailing) {
+  // TODO: If/when other types of memprof cloning are enabled beyond just for
+  // hot and cold, we will need to change this to individually control the
+  // AllocationType passed to addStackNodesForMIB during CCG construction.
+  // The index was set from the option, so these should be in sync.
+  assert(Index.withSupportsHotColdNew() == SupportsHotColdNew);
+  if (!SupportsHotColdNew)
+    return;
+
   IndexCallsiteContextGraph CCG(Index, isPrevailing);
   CCG.process();
 }
Index: llvm/lib/LTO/LTOBackend.cpp
===================================================================
--- llvm/lib/LTO/LTOBackend.cpp
+++ llvm/lib/LTO/LTOBackend.cpp
@@ -565,6 +565,8 @@
   // the module, if applicable.
   Mod.setPartialSampleProfileRatio(CombinedIndex);
 
+  updateMemProfAttributes(Mod, CombinedIndex);
+
   updatePublicTypeTestCalls(Mod, CombinedIndex.withWholeProgramVisibility());
 
   if (Conf.CodeGenOnly) {
Index: llvm/lib/LTO/LTO.cpp
===================================================================
--- llvm/lib/LTO/LTO.cpp
+++ llvm/lib/LTO/LTO.cpp
@@ -76,6 +76,10 @@
     cl::desc("Enable global value internalization in LTO"));
 }
 
+/// Indicate we are linking with an allocator that supports hot/cold operator
+/// new interfaces.
+extern cl::opt<bool> SupportsHotColdNew;
+
 /// Enable MemProf context disambiguation for thin link.
 extern cl::opt<bool> EnableMemProfContextDisambiguation;
 
@@ -1079,6 +1083,9 @@
     return StatsFileOrErr.takeError();
   std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get());
 
+  if (SupportsHotColdNew)
+    ThinLTO.CombinedIndex.setWithSupportsHotColdNew();
+
   Error Result = runRegularLTO(AddStream);
   if (!Result)
     Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
@@ -1089,6 +1096,37 @@
   return Result;
 }
 
+void lto::updateMemProfAttributes(Module &Mod,
+                                  const ModuleSummaryIndex &Index) {
+  if (Index.withSupportsHotColdNew())
+    return;
+
+  // The profile matcher applies hotness attributes directly for allocations,
+  // and those will cause us to generate calls to the hot/cold interfaces
+  // unconditionally. If supports-hot-cold-new was not enabled in the LTO
+  // link then assume we don't want these calls (e.g. not linking with
+  // the appropriate library, or otherwise trying to disable this behavior).
+  for (auto &F : Mod) {
+    for (auto &BB : F) {
+      for (auto &I : BB) {
+        auto *CI = dyn_cast<CallBase>(&I);
+        if (!CI)
+          continue;
+        if (CI->hasFnAttr("memprof"))
+          CI->removeFnAttr("memprof");
+        // Strip off all memprof metadata as it is no longer needed.
+        // Importantly, this avoids the addition of new memprof attributes
+        // after inlining propagation.
+        // TODO: If we support additional types of MemProf metadata beyond hot
+        // and cold, we will need to update the metadata based on the allocator
+        // APIs supported instead of completely stripping all.
+        CI->setMetadata(LLVMContext::MD_memprof, nullptr);
+        CI->setMetadata(LLVMContext::MD_callsite, nullptr);
+      }
+    }
+  }
+}
+
 Error LTO::runRegularLTO(AddStreamFn AddStream) {
   // Setup optimization remarks.
   auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
@@ -1142,6 +1180,8 @@
     }
   }
 
+  updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex);
+
   // If allowed, upgrade public vcall visibility metadata to linkage unit
   // visibility before whole program devirtualization in the optimizer.
   updateVCallVisibilityInModule(*RegularLTO.CombinedModule,
Index: llvm/lib/IR/ModuleSummaryIndex.cpp
===================================================================
--- llvm/lib/IR/ModuleSummaryIndex.cpp
+++ llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -107,11 +107,13 @@
     Flags |= 0x40;
   if (withWholeProgramVisibility())
     Flags |= 0x80;
+  if (withSupportsHotColdNew())
+    Flags |= 0x100;
   return Flags;
 }
 
 void ModuleSummaryIndex::setFlags(uint64_t Flags) {
-  assert(Flags <= 0xff && "Unexpected bits in flag");
+  assert(Flags <= 0x1ff && "Unexpected bits in flag");
   // 1 bit: WithGlobalValueDeadStripping flag.
   // Set on combined index only.
   if (Flags & 0x1)
@@ -145,6 +147,10 @@
   // Set on combined index only.
   if (Flags & 0x80)
     setWithWholeProgramVisibility();
+  // 1 bit: WithSupportsHotColdNew flag.
+  // Set on combined index only.
+  if (Flags & 0x100)
+    setWithSupportsHotColdNew();
 }
 
 // Collect for the given module the list of function it defines
Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp
===================================================================
--- llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -8067,7 +8067,7 @@
     case bitc::FS_FLAGS: { // [flags]
       uint64_t Flags = Record[0];
       // Scan flags.
-      assert(Flags <= 0xff && "Unexpected bits in flag");
+      assert(Flags <= 0x1ff && "Unexpected bits in flag");
 
       return Flags & 0x8;
     }
Index: llvm/include/llvm/LTO/LTO.h
===================================================================
--- llvm/include/llvm/LTO/LTO.h
+++ llvm/include/llvm/LTO/LTO.h
@@ -96,6 +96,11 @@
 /// ordered indices to elements in the input array.
 std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R);
 
+/// Updates MemProf attributes (and metadata) based on whether the index
+/// has recorded that we are linking with allocation libraries containing
+/// the necessary APIs for downstream transformations.
+void updateMemProfAttributes(Module &Mod, const ModuleSummaryIndex &Index);
+
 class LTO;
 struct SymbolResolution;
 class ThinBackendProc;
Index: llvm/include/llvm/IR/ModuleSummaryIndex.h
===================================================================
--- llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -1305,6 +1305,9 @@
   /// Indicates that summary-based synthetic entry count propagation has run
   bool HasSyntheticEntryCounts = false;
 
+  /// Indicates that we linked with allocator supporting hot/cold new operators.
+  bool WithSupportsHotColdNew = false;
+
   /// Indicates that distributed backend should skip compilation of the
   /// module. Flag is suppose to be set by distributed ThinLTO indexing
   /// when it detected that the module is not needed during the final
@@ -1513,6 +1516,9 @@
   bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; }
   void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; }
 
+  bool withSupportsHotColdNew() const { return WithSupportsHotColdNew; }
+  void setWithSupportsHotColdNew() { WithSupportsHotColdNew = true; }
+
   bool skipModuleByDistributedBackend() const {
     return SkipModuleByDistributedBackend;
   }
Index: clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll
===================================================================
--- /dev/null
+++ clang/test/CodeGen/thinlto-distributed-supports-hot-cold-new.ll
@@ -0,0 +1,70 @@
+; REQUIRES: x86-registered-target
+
+;; Test that passing -supports-hot-cold-new to the thin link prevents memprof
+;; metadata and attributes from being removed from the distributed ThinLTO
+;; backend, and vice versa without passing -supports-hot-cold-new.
+
+;; First check with -supports-hot-cold-new.
+; RUN: opt -thinlto-bc %s >%t.o
+; RUN: llvm-lto2 run %t.o -save-temps \
+; RUN:  -supports-hot-cold-new \
+; RUN:  -thinlto-distributed-indexes \
+; RUN:  -r=%t.o,main,plx \
+; RUN:  -r=%t.o,_Znam, \
+; RUN:  -o %t.out
+
+;; Ensure that the index file reflects the -supports-hot-cold-new, as that is
+;; how the ThinLTO backend behavior is controlled.
+; RUN: llvm-dis %t.out.index.bc -o - | FileCheck %s --check-prefix=CHECK-INDEX-ON
+;; Flags are printed in decimal, but this corresponds to 0x161, and 0x100 is
+;; the value indicating -supports-hot-cold-new was enabled.
+; CHECK-INDEX-ON: flags: 353
+
+; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj
+
+; RUN: llvm-dis %t.s.0.preopt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+; CHECK-IR: !memprof {{.*}} !callsite
+; CHECK-IR: "memprof"="cold"
+
+;; Next check without -supports-hot-cold-new, we should not perform
+;; context disambiguation, and we should strip memprof metadata and
+;; attributes before optimization during the distributed backend.
+; RUN: llvm-lto2 run %t.o -save-temps \
+; RUN:  -thinlto-distributed-indexes \
+; RUN:  -r=%t.o,main,plx \
+; RUN:  -r=%t.o,_Znam, \
+; RUN:  -o %t.out
+
+;; Ensure that the index file reflects not having -supports-hot-cold-new.
+; RUN: llvm-dis %t.out.index.bc -o - | FileCheck %s --check-prefix=CHECK-INDEX-OFF
+;; Flags are printed in decimal, but this corresponds to 0x61, without 0x100 set.
+; CHECK-INDEX-OFF: flags: 97
+
+; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.o.thinlto.bc -save-temps=obj
+
+; RUN: llvm-dis %t.s.0.preopt.bc -o - | FileCheck %s \
+; RUN: --implicit-check-not "!memprof" --implicit-check-not "!callsite" \
+; RUN: --implicit-check-not "memprof"="cold"
+
+source_filename = "thinlto-distributed-supports-hot-cold-new.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+  %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5
+  %call1 = call ptr @_Znam(i64 0) #1
+  ret i32 0
+}
+
+declare ptr @_Znam(i64)
+
+attributes #0 = { noinline optnone }
+attributes #1 = { "memprof"="cold" }
+
+!0 = !{!1, !3}
+!1 = !{!2, !"notcold"}
+!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!3 = !{!4, !"cold"}
+!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!5 = !{i64 9086428284934609951}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to