hoy updated this revision to Diff 320844.
hoy added a comment.

Rebasing.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93264/new/

https://reviews.llvm.org/D93264

Files:
  clang/test/CodeGen/pseudo-probe-emit.c
  llvm/include/llvm/IR/IntrinsicInst.h
  llvm/include/llvm/IR/Intrinsics.td
  llvm/include/llvm/IR/PseudoProbe.h
  llvm/include/llvm/Passes/StandardInstrumentations.h
  llvm/include/llvm/ProfileData/SampleProf.h
  llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
  llvm/lib/IR/PseudoProbe.cpp
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Passes/PassRegistry.def
  llvm/lib/Passes/StandardInstrumentations.cpp
  llvm/lib/Transforms/IPO/SampleProfile.cpp
  llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
  llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
  llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
  llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
  llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
  llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
  llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
  llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll

Index: llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
@@ -0,0 +1,77 @@
+; REQUIRES: x86_64-linux
+; RUN: opt < %s -passes='pseudo-probe,loop-unroll-full' -verify-pseudo-probe -S -o %t 2>&1 | FileCheck %s --check-prefix=VERIFY
+; RUN: FileCheck %s < %t
+
+; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
+; VERIFY: Function foo:
+; VERIFY: Probe 6	previous factor 1.00	current factor 5.00
+; VERIFY: Probe 4	previous factor 1.00	current factor 5.00
+
+declare void @foo2() nounwind
+
+define void @foo(i32 %x) {
+bb:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
+  %tmp = alloca [5 x i32*], align 16
+  br label %bb7.preheader
+
+bb3.loopexit:
+  %spec.select.lcssa = phi i32 [ %spec.select, %bb10 ]
+  %tmp5.not = icmp eq i32 %spec.select.lcssa, 0
+  br i1 %tmp5.not, label %bb24, label %bb7.preheader
+
+bb7.preheader:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+  %tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ]
+  br label %bb10
+
+bb10:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
+  %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
+  %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
+  %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv
+  %tmp14 = load i32*, i32** %tmp13, align 8
+  %tmp15.not = icmp ne i32* %tmp14, null
+  %tmp18 = sext i1 %tmp15.not to i32
+  %spec.select = add nsw i32 %tmp1.14, %tmp18
+  call void @foo2(), !dbg !12
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 5
+  br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
+
+bb24:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+  ret void
+}
+
+;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
+;; with an index of 6 and a scale of -1%.
+; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
+; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.9.0"}
+!12 = !DILocation(line: 2, column: 20, scope: !4)
+!13 = distinct !{!13, !14}
+!14 = !{!"llvm.loop.unroll.full"}
Index: llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -passes='pseudo-probe,sample-profile,jump-threading,pseudo-probe-update' -sample-profile-file=%S/Inputs/pseudo-probe-update.prof -S  | FileCheck %s
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+
+;; This tests that the branch in 'merge' can be cloned up into T1.
+define i32 @foo(i1 %cond, i1 %cond2) #0 {
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
+	br i1 %cond, label %T1, label %F1
+T1:
+; CHECK: %v1 = call i32 @f1(), !prof ![[#PROF1:]]
+	%v1 = call i32 @f1()
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
+;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+    %cond3 = icmp eq i32 %v1, 412
+	br label %Merge
+F1:
+; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
+	%v2 = call i32 @f2()
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
+	br label %Merge
+Merge:
+
+	%A = phi i1 [%cond3, %T1], [%cond2, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	br i1 %A, label %T2, label %F2
+T2:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+	call void @f3()
+	ret i32 %B
+F2:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
+	ret i32 %B
+}
+
+; CHECK: ![[#PROF1]] = !{!"branch_weights", i32 7}
+; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
+
+attributes #0 = {"use-sample-profile"}
+
Index: llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
===================================================================
--- llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
+++ llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
@@ -8,26 +8,26 @@
   store i32 %x, i32* %x.addr, align 4
   %0 = load i32, i32* %x.addr, align 4
   %cmp = icmp eq i32 %0, 0
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
   br i1 %cmp, label %if.then, label %if.else
   ; CHECK: br i1 %cmp, label %if.then, label %if.else, !prof ![[PD1:[0-9]+]]
 
 if.then:
   ; CHECK: call {{.*}}, !dbg ![[#PROBE1:]], !prof ![[PROF1:[0-9]+]]
   call void %f(i32 1)
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
   store i32 1, i32* %retval, align 4
   br label %return
 
 if.else:
   ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]]
   call void %f(i32 2)
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
   store i32 2, i32* %retval, align 4
   br label %return
 
 return:
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
   %1 = load i32, i32* %retval, align 4
   ret i32 %1
 }
@@ -36,14 +36,14 @@
 
 ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7}
 ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
-;; A discriminator of 119537711 which is 0x400002f in hexdecimal, stands for an indirect call probe
+;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe
 ;; with an index of 5.
-; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108911)
+; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
 ; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
-;; A discriminator of 119537719 which is 0x4000037 in hexdecimal, stands for an indirect call probe
+;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe
 ;; with an index of 6.
-; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108919)
+; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
+; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719)
 ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2}
 
 !llvm.module.flags = !{!9, !10}
@@ -69,6 +69,10 @@
 ;YAML-NEXT:    - NumSamples:      '13'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '13'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -80,6 +84,10 @@
 ;YAML-NEXT:    - NumSamples:      '7'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '5'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '7'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -91,6 +99,10 @@
 ;YAML-NEXT:    - NumSamples:      '7'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '2'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '7'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -102,6 +114,10 @@
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '6'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '6'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -113,6 +129,10 @@
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '3'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '6'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -124,4 +144,8 @@
 ;YAML-NEXT:    - NumSamples:      '13'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '4'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '13'
 ;YAML-NEXT:    - String:          ')'
Index: llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
===================================================================
--- llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -12,18 +12,18 @@
 
 define dso_local i32 @foo(i32 %x) #0 !dbg !12 {
 entry:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1)
   %add = add nsw i32 %x, 100000, !dbg !19
 ;; Check zen is fully inlined so there's no call to zen anymore.
 ;; Check code from the inlining of zen is properly annotated here.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
 ; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]]
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
 ; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]]
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
 ; CHECK-NOT: call i32 @zen
   %call = call i32 @zen(i32 %add), !dbg !20
   ret i32 %call, !dbg !21
@@ -32,36 +32,36 @@
 ; CHECK: define dso_local i32 @zen
 define dso_local i32 @zen(i32 %x) #0 !dbg !22 {
 entry:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1)
   %cmp = icmp sgt i32 %x, 0, !dbg !26
   br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28
 
 while.cond:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
   %x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ]
   %cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29
   br i1 %cmp1, label %while.body, label %if.end, !dbg !31
 
 while.body:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
   %0 = load volatile i32, i32* @factor, align 4, !dbg !32
   %sub = sub nsw i32 %x.addr.0, %0, !dbg !39
   br label %while.cond, !dbg !31
 
 while.cond2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
   %x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ]
   %cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42
   br i1 %cmp3, label %while.body4, label %if.end, !dbg !44
 
 while.body4:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
   %1 = load volatile i32, i32* @factor, align 4, !dbg !45
   %add = add nsw i32 %x.addr.1, %1, !dbg !48
   br label %while.cond2, !dbg !44
 
 if.end:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
   %x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ]
   ret i32 %x.addr.2, !dbg !51
 }
@@ -109,6 +109,10 @@
 ;YAML-NEXT:    - NumSamples:      '23'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '23'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:  ...
 ;YAML:  --- !Analysis
@@ -121,6 +125,10 @@
 ;YAML-NEXT:    - NumSamples:      '23'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '23'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:  ...
 ;YAML:  --- !Analysis
@@ -133,6 +141,10 @@
 ;YAML-NEXT:    - NumSamples:      '382920'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '2'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '382920'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:  ...
 
Index: llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
===================================================================
--- llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
+++ llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
@@ -11,32 +11,36 @@
 
 ;; Check the generation of pseudoprobe intrinsic call.
 
+@a = dso_local global i32 0, align 4
+
 define void @foo(i32 %x) !dbg !3 {
 bb0:
   %cmp = icmp eq i32 %x, 0
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0), !dbg ![[#FAKELINE:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID:]] 1 0 0
   br i1 %cmp, label %bb1, label %bb2
 
 bb1:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0), !dbg ![[#FAKELINE]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 3 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 4 0 0
+  store i32 6, i32* @a, align 4
   br label %bb3
 
 bb2:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0), !dbg ![[#FAKELINE]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 2 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 4 0 0
+  store i32 8, i32* @a, align 4
   br label %bb3
 
 bb3:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0), !dbg ![[#REALLINE:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1), !dbg ![[#REALLINE:]]
   ret void, !dbg !12
 }
 
@@ -44,7 +48,7 @@
 
 define internal void @foo2(void (i32)* %f) !dbg !4 {
 entry:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID2:]] 1 0 0
 ; Check pseudo_probe metadata attached to the indirect call instruction.
@@ -64,13 +68,13 @@
 ; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]])
 ; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#FOO]])
 ; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]])
-;; A discriminator of 67108887 which is 0x4000017 in hexdecimal, stands for a direct call probe
+;; A discriminator of 67108887 which is 0x7200017 in hexdecimal, stands for a direct call probe
 ;; with an index of 2.
-; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108887)
+; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537687)
 ; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
-;; A discriminator of 134217759 which is 0x800001f in hexdecimal, stands for a direct call probe
+;; A discriminator of 186646559 which is 0xb20001f in hexdecimal, stands for a direct call probe
 ;; with an index of 3.
-; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759)
+; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646559)
 
 ; Check the generation of .pseudo_probe_desc section
 ; CHECK-ASM: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo,comdat
Index: llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
===================================================================
--- llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
+++ llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
@@ -11,14 +11,14 @@
 ; RUN: llvm-objdump --section-headers  %t4 | FileCheck %s --check-prefix=CHECK-OBJ
 
 define dso_local void @foo2() !dbg !7 {
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
 ; CHECK-ASM: .pseudoprobe	[[#GUID1:]] 1 0 0
   ret void, !dbg !10
 }
 
 define dso_local void @foo() #0 !dbg !11 {
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0), !dbg ![[#]]
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL1:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]]
 ; CHECK-ASM: .pseudoprobe	[[#GUID2:]] 1 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID1]] 1 0 0 @ [[#GUID2]]:2
   call void @foo2(), !dbg !12
@@ -26,9 +26,9 @@
 }
 
 define dso_local i32 @entry() !dbg !14 {
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0), !dbg ![[#]]
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0), !dbg ![[#DL2:]]
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL3:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]]
 ; CHECK-ASM: .pseudoprobe	[[#GUID3:]] 1 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID2]] 1 0 0 @ [[#GUID3]]:2
 ; CHECK-ASM: .pseudoprobe	[[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2
@@ -41,13 +41,13 @@
 ; CHECK-IL: ![[#SCOPE2:]] = distinct !DISubprogram(name: "foo"
 ; CHECK-IL: ![[#DL1]] = !DILocation(line: 3, column: 1,  scope: ![[#SCOPE1]], inlinedAt: ![[#INL1:]])
 ; CHECK-IL: ![[#INL1]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1:]])
-;; A discriminator of 134217751 which is 0x8000017 in hexdecimal, stands for a direct call probe
-;; with an index of 2.
-; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 134217751)
+;; A discriminator of 186646551 which is 0xb200017 in hexdecimal, stands for a direct call probe
+;; with an index of 2 and a scale of 100%.
+; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 186646551)
 ; CHECK-IL: ![[#SCOPE3:]] = distinct !DISubprogram(name: "entry"
 ; CHECK-IL: ![[#DL2]] = !DILocation(line: 7, column: 3,  scope: ![[#SCOPE2]], inlinedAt: ![[#INL2:]])
 ; CHECK-IL: ![[#INL2]] = distinct !DILocation(line: 11, column: 3, scope: ![[#BL2:]])
-; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 134217751)
+; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 186646551)
 ; CHECK-IL: ![[#DL3]] = !DILocation(line: 3, column: 1,  scope: ![[#SCOPE1]], inlinedAt: ![[#INL3:]])
 ; CHECK-IL: ![[#INL3]] = distinct !DILocation(line: 7, column: 3,  scope: ![[#BL1]], inlinedAt: ![[#INL2]])
 
Index: llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
@@ -0,0 +1,8 @@
+foo:3200:13
+ 1: 13
+ 2: 7
+ 3: 6
+ 4: 13
+ 5: 7
+ 6: 6
+ !CFGChecksum: 844530426352218
Index: llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
===================================================================
--- llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -25,8 +26,10 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/CRC.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <unordered_set>
 #include <vector>
 
 using namespace llvm;
@@ -35,6 +38,115 @@
 STATISTIC(ArtificialDbgLine,
           "Number of probes that have an artificial debug line");
 
+static cl::opt<bool>
+    VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
+                      cl::desc("Do pseudo probe verification"));
+
+static cl::list<std::string> VerifyPseudoProbeFuncList(
+    "verify-pseudo-probe-funcs", cl::Hidden,
+    cl::desc("The option to specify the name of the functions to verify."));
+
+static cl::opt<bool>
+    UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
+                      cl::desc("Update pseudo probe distribution factor"));
+
+bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
+  // Skip function declaration.
+  if (F->isDeclaration())
+    return false;
+  // Skip function that will not be emitted into object file. The prevailing
+  // defintion will be verified instead.
+  if (F->hasAvailableExternallyLinkage())
+    return false;
+  // Do a name matching.
+  static std::unordered_set<std::string> VerifyFuncNames(
+      VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
+  return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
+}
+
+void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+  if (VerifyPseudoProbe) {
+    PIC.registerAfterPassCallback(
+        [this](StringRef P, Any IR, const PreservedAnalyses &) {
+          this->runAfterPass(P, IR);
+        });
+  }
+}
+
+// Callback to run after each transformation for the new pass manager.
+void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
+  std::string Banner =
+      "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
+  dbgs() << Banner;
+  if (any_isa<const Module *>(IR))
+    runAfterPass(any_cast<const Module *>(IR));
+  else if (any_isa<const Function *>(IR))
+    runAfterPass(any_cast<const Function *>(IR));
+  else if (any_isa<const LazyCallGraph::SCC *>(IR))
+    runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
+  else if (any_isa<const Loop *>(IR))
+    runAfterPass(any_cast<const Loop *>(IR));
+  else
+    llvm_unreachable("Unknown IR unit");
+}
+
+void PseudoProbeVerifier::runAfterPass(const Module *M) {
+  for (const Function &F : *M)
+    runAfterPass(&F);
+}
+
+void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
+  for (const LazyCallGraph::Node &N : *C)
+    runAfterPass(&N.getFunction());
+}
+
+void PseudoProbeVerifier::runAfterPass(const Function *F) {
+  if (!shouldVerifyFunction(F))
+    return;
+  ProbeFactorMap ProbeFactors;
+  for (const auto &BB : *F)
+    collectProbeFactors(&BB, ProbeFactors);
+  verifyProbeFactors(F, ProbeFactors);
+}
+
+void PseudoProbeVerifier::runAfterPass(const Loop *L) {
+  const Function *F = L->getHeader()->getParent();
+  runAfterPass(F);
+}
+
+void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
+                                              ProbeFactorMap &ProbeFactors) {
+  for (const auto &I : *Block) {
+    if (Optional<PseudoProbe> Probe = extractProbe(I))
+      ProbeFactors[Probe->Id] += Probe->Factor;
+  }
+}
+
+void PseudoProbeVerifier::verifyProbeFactors(
+    const Function *F, const ProbeFactorMap &ProbeFactors) {
+  bool BannerPrinted = false;
+  auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
+  for (const auto &I : ProbeFactors) {
+    float CurProbeFactor = I.second;
+    if (PrevProbeFactors.count(I.first)) {
+      float PrevProbeFactor = PrevProbeFactors[I.first];
+      if (std::abs(CurProbeFactor - PrevProbeFactor) >
+          DistributionFactorVariance) {
+        if (!BannerPrinted) {
+          dbgs() << "Function " << F->getName() << ":\n";
+          BannerPrinted = true;
+        }
+        dbgs() << "Probe " << I.first << "\tprevious factor "
+               << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
+               << format("%0.2f", CurProbeFactor) << "\n";
+      }
+    }
+
+    // Update
+    PrevProbeFactors[I.first] = I.second;
+  }
+}
+
 PseudoProbeManager::PseudoProbeManager(const Module &M) {
   if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
     for (const auto *Operand : FuncInfo->operands()) {
@@ -201,7 +313,8 @@
     Function *ProbeFn =
         llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
     Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
-                     Builder.getInt32(0)};
+                     Builder.getInt32(0),
+                     Builder.getInt64(PseudoProbeFullDistributionFactor)};
     auto *Probe = Builder.CreateCall(ProbeFn, Args);
     AssignDebugLoc(Probe);
   }
@@ -219,7 +332,8 @@
     // Levarge the 32-bit discriminator field of debug data to store the ID and
     // type of a callsite probe. This gets rid of the dependency on plumbing a
     // customized metadata through the codegen pipeline.
-    uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type);
+    uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+        Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
     if (auto DIL = Call->getDebugLoc()) {
       DIL = DIL->cloneWithDiscriminator(V);
       Call->setDebugLoc(DIL);
@@ -274,3 +388,47 @@
 
   return PreservedAnalyses::none();
 }
+
+void PseudoProbeUpdatePass::runOnFunction(Function &F,
+                                          FunctionAnalysisManager &FAM) {
+  BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+  auto BBProfileCount = [&BFI](BasicBlock *BB) {
+    return BFI.getBlockProfileCount(BB)
+               ? BFI.getBlockProfileCount(BB).getValue()
+               : 0;
+  };
+
+  // Collect the sum of execution weight for each probe.
+  ProbeFactorMap ProbeFactors;
+  for (auto &Block : F) {
+    for (auto &I : Block) {
+      if (Optional<PseudoProbe> Probe = extractProbe(I))
+        ProbeFactors[Probe->Id] += BBProfileCount(&Block);
+    }
+  }
+
+  // Fix up over-counted probes.
+  for (auto &Block : F) {
+    for (auto &I : Block) {
+      if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+        float Sum = ProbeFactors[Probe->Id];
+        if (Sum != 0)
+          setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
+      }
+    }
+  }
+}
+
+PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
+                                             ModuleAnalysisManager &AM) {
+  if (UpdatePseudoProbe) {
+    for (auto &F : M) {
+      if (F.isDeclaration())
+        continue;
+      FunctionAnalysisManager &FAM =
+          AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+      runOnFunction(F, FAM);
+    }
+  }
+  return PreservedAnalyses::none();
+}
Index: llvm/lib/Transforms/IPO/SampleProfile.cpp
===================================================================
--- llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -108,6 +108,8 @@
 STATISTIC(NumMismatchedProfile,
           "Number of functions with CFG mismatched profile");
 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
+STATISTIC(NumDuplicatedInlinesite,
+          "Number of inlined callsites with a partial distribution factor");
 
 STATISTIC(NumCSInlinedHitMinLimit,
           "Number of functions with FDO inline stopped due to min size limit");
@@ -358,7 +360,14 @@
 struct InlineCandidate {
   CallBase *CallInstr;
   const FunctionSamples *CalleeSamples;
+  // Prorated callsite count, which will be used to guide inlining. For example,
+  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
+  // copies will get their own distribution factors and their prorated counts
+  // will be used to decide if they should be inlined independently.
   uint64_t CallsiteCount;
+  // Call site distribution factor to prorate the profile samples for a
+  // duplicated callsite. Default value is 1.0.
+  float CallsiteDistribution;
 };
 
 // Inline candidate comparer using call site weight
@@ -418,8 +427,8 @@
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
   // Attempt to promote indirect call and also inline the promoted call
   bool tryPromoteAndInlineCandidate(
-      Function &F, InlineCandidate &Candidate, uint64_t &Sum,
-      DenseSet<Instruction *> &PromotedInsns,
+      Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
+      uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
       SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &InlinedGUIDs);
@@ -886,7 +895,7 @@
 
   const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
   if (R) {
-    uint64_t Samples = R.get();
+    uint64_t Samples = R.get() * Probe->Factor;
     bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
     if (FirstMark) {
       ORE->emit([&]() {
@@ -894,13 +903,17 @@
         Remark << "Applied " << ore::NV("NumSamples", Samples);
         Remark << " samples from profile (ProbeId=";
         Remark << ore::NV("ProbeId", Probe->Id);
+        Remark << ", Factor=";
+        Remark << ore::NV("Factor", Probe->Factor);
+        Remark << ", OriginalSamples=";
+        Remark << ore::NV("OriginalSamples", R.get());
         Remark << ")";
         return Remark;
       });
     }
-
     LLVM_DEBUG(dbgs() << "    " << Probe->Id << ":" << Inst
-                      << " - weight: " << R.get() << ")\n");
+                      << " - weight: " << R.get() << " - factor: "
+                      << format("%0.2f", Probe->Factor) << ")\n");
     return Samples;
   }
   return R;
@@ -1085,7 +1098,7 @@
 /// \param InlinedCallSite  Output vector for new call sites exposed after
 /// inlining.
 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
-    Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+    Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
     DenseSet<Instruction *> &PromotedInsns,
     SmallVector<CallBase *, 8> *InlinedCallSite) {
   const char *Reason = "Callee function not available";
@@ -1106,10 +1119,28 @@
                                   Candidate.CallsiteCount, Sum, false, ORE);
     if (DI) {
       Sum -= Candidate.CallsiteCount;
+      // Prorate the indirect callsite distribution.
+      // Do not update the promoted direct callsite distribution at this
+      // point since the original distribution combined with the callee
+      // profile will be used to prorate callsites from the callee if
+      // inlined. Once not inlined, the direct callsite distribution should
+      // be prorated so that the it will reflect the real callsite counts.
+      setProbeDistributionFactor(*Candidate.CallInstr,
+                                 Candidate.CallsiteDistribution * Sum /
+                                     SumOrigin);
       PromotedInsns.insert(Candidate.CallInstr);
       Candidate.CallInstr = DI;
-      if (isa<CallInst>(DI) || isa<InvokeInst>(DI))
-        return tryInlineCandidate(Candidate, InlinedCallSite);
+      if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
+        bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
+        if (!Inlined) {
+          // Prorate the direct callsite distribution so that it reflects real
+          // callsite counts.
+          setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
+                                              Candidate.CallsiteCount /
+                                              SumOrigin);
+        }
+        return Inlined;
+      }
     }
   } else {
     LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
@@ -1216,11 +1247,11 @@
     }
     for (CallBase *I : CIS) {
       Function *CalledFunction = I->getCalledFunction();
-      InlineCandidate Candidate = {I,
-                                   LocalNotInlinedCallSites.count(I)
-                                       ? LocalNotInlinedCallSites[I]
-                                       : nullptr,
-                                   0 /* dummy count */};
+      InlineCandidate Candidate = {
+          I,
+          LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
+                                            : nullptr,
+          0 /* dummy count */, 1.0 /* dummy distribution factor */};
       // Do not inline recursive calls.
       if (CalledFunction == &F)
         continue;
@@ -1229,6 +1260,7 @@
           continue;
         uint64_t Sum;
         for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
+          uint64_t SumOrigin = Sum;
           if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
             FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
                                      PSI->getOrCompHotCountThreshold());
@@ -1237,8 +1269,9 @@
           if (!callsiteIsHot(FS, PSI))
             continue;
 
-          Candidate = {I, FS, FS->getEntrySamples()};
-          if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) {
+          Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+          if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+                                           PromotedInsns)) {
             LocalNotInlinedCallSites.erase(I);
             LocalChanged = true;
           }
@@ -1343,6 +1376,23 @@
     if (ProfileIsCS)
       ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
     ++NumCSInlined;
+
+    // Prorate inlined probes for a duplicated inlining callsite which probably
+    // has a distribution less than 100%. Samples for an inlinee should be
+    // distributed among the copies of the original callsite based on each
+    // callsite's distribution factor for counts accuracy. Note that an inlined
+    // probe may come with its own distribution factor if it has been duplicated
+    // in the inlinee body. The two factor are multiplied to reflect the
+    // aggregation of duplication.
+    if (Candidate.CallsiteDistribution < 1) {
+      for (auto &I : IFI.InlinedCallSites) {
+        if (Optional<PseudoProbe> Probe = extractProbe(*I))
+          setProbeDistributionFactor(*I, Probe->Factor *
+                                             Candidate.CallsiteDistribution);
+      }
+      NumDuplicatedInlinesite++;
+    }
+
     return true;
   }
   return false;
@@ -1360,14 +1410,19 @@
   if (!CalleeSamples)
     return false;
 
+  float Factor = 1.0;
+  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
+    Factor = Probe->Factor;
+
   uint64_t CallsiteCount = 0;
   ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
   if (Weight)
     CallsiteCount = Weight.get();
   if (CalleeSamples)
-    CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples());
+    CallsiteCount = std::max(
+        CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
 
-  *NewCandidate = {CB, CalleeSamples, CallsiteCount};
+  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
   return true;
 }
 
@@ -1479,6 +1534,7 @@
       uint64_t Sum;
       auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
       uint64_t SumOrigin = Sum;
+      Sum *= Candidate.CallsiteDistribution;
       for (const auto *FS : CalleeSamples) {
         // TODO: Consider disable pre-lTO ICP for MonoLTO as well
         if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
@@ -1486,7 +1542,8 @@
                                    PSI->getOrCompHotCountThreshold());
           continue;
         }
-        uint64_t EntryCountDistributed = FS->getEntrySamples();
+        uint64_t EntryCountDistributed =
+            FS->getEntrySamples() * Candidate.CallsiteDistribution;
         // In addition to regular inline cost check, we also need to make sure
         // ICP isn't introducing excessive speculative checks even if individual
         // target looks beneficial to promote and inline. That means we should
@@ -1505,9 +1562,10 @@
         SmallVector<CallBase *, 8> InlinedCallSites;
         // Attach function profile for promoted indirect callee, and update
         // call site count for the promoted inline candidate too.
-        Candidate = {I, FS, EntryCountDistributed};
-        if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns,
-                                         &InlinedCallSites)) {
+        Candidate = {I, FS, EntryCountDistributed,
+                     Candidate.CallsiteDistribution};
+        if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+                                         PromotedInsns, &InlinedCallSites)) {
           for (auto *CB : InlinedCallSites) {
             if (getInlineCandidate(&NewCandidate, CB))
               CQueue.emplace(NewCandidate);
@@ -1965,6 +2023,14 @@
           auto T = FS->findCallTargetMapAt(CallSite);
           if (!T || T.get().empty())
             continue;
+          // Prorate the callsite counts to reflect what is already done to the
+          // callsite, such as ICP or calliste cloning.
+          if (FunctionSamples::ProfileIsProbeBased) {
+            if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+              if (Probe->Factor < 1)
+                T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
+            }
+          }
           SmallVector<InstrProfValueData, 2> SortedCallTargets =
               GetSortedValueDataFromCallTargets(T.get());
           uint64_t Sum;
Index: llvm/lib/Passes/StandardInstrumentations.cpp
===================================================================
--- llvm/lib/Passes/StandardInstrumentations.cpp
+++ llvm/lib/Passes/StandardInstrumentations.cpp
@@ -882,6 +882,7 @@
   OptBisect.registerCallbacks(PIC);
   PreservedCFGChecker.registerCallbacks(PIC);
   PrintChangedIR.registerCallbacks(PIC);
+  PseudoProbeVerification.registerCallbacks(PIC);
   if (VerifyEach)
     Verify.registerCallbacks(PIC);
 }
Index: llvm/lib/Passes/PassRegistry.def
===================================================================
--- llvm/lib/Passes/PassRegistry.def
+++ llvm/lib/Passes/PassRegistry.def
@@ -119,6 +119,7 @@
 MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
 MODULE_PASS("memprof-module", ModuleMemProfilerPass())
 MODULE_PASS("poison-checking", PoisonCheckingPass())
+MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
 #undef MODULE_PASS
 
 #ifndef CGSCC_ANALYSIS
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -1428,6 +1428,9 @@
   // Now add the optimization pipeline.
   MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
 
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
 
@@ -1482,6 +1485,9 @@
   if (PTO.Coroutines)
     MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
 
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
 
Index: llvm/lib/IR/PseudoProbe.cpp
===================================================================
--- llvm/lib/IR/PseudoProbe.cpp
+++ llvm/lib/IR/PseudoProbe.cpp
@@ -35,6 +35,9 @@
           PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
       Probe.Attr =
           PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator);
+      Probe.Factor =
+          PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
+          (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
       return Probe;
     }
   }
@@ -47,6 +50,8 @@
     Probe.Id = II->getIndex()->getZExtValue();
     Probe.Type = (uint32_t)PseudoProbeType::Block;
     Probe.Attr = II->getAttributes()->getZExtValue();
+    Probe.Factor = II->getFactor()->getZExtValue() /
+                   (float)PseudoProbeFullDistributionFactor;
     return Probe;
   }
 
@@ -55,4 +60,40 @@
 
   return None;
 }
+
+void setProbeDistributionFactor(Instruction &Inst, float Factor) {
+  assert(Factor >= 0 && Factor <= 1 &&
+         "Distribution factor must be in [0, 1.0]");
+  if (auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
+    IRBuilder<> Builder(&Inst);
+    uint64_t IntFactor = PseudoProbeFullDistributionFactor;
+    if (Factor < 1)
+      IntFactor *= Factor;
+    auto OrigFactor = II->getFactor()->getZExtValue();
+    if (IntFactor != OrigFactor)
+      II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor));
+  } else if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst)) {
+    if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
+      const DILocation *DIL = DLoc;
+      auto Discriminator = DIL->getDiscriminator();
+      if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
+        auto Index =
+            PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
+        auto Type =
+            PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
+        auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+            Discriminator);
+        // Round small factors to 0 to avoid over-counting.
+        uint32_t IntFactor =
+            PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+        if (Factor < 1)
+          IntFactor *= Factor;
+        uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+            Index, Type, Attr, IntFactor);
+        DIL = DIL->cloneWithDiscriminator(V);
+        Inst.setDebugLoc(DIL);
+      }
+    }
+  }
+}
 } // namespace llvm
Index: llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
===================================================================
--- llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -16,6 +16,10 @@
 #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassInstrumentation.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PseudoProbe.h"
 #include "llvm/ProfileData/SampleProf.h"
@@ -29,6 +33,8 @@
 using namespace sampleprof;
 using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>;
 using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>;
+using ProbeFactorMap = std::unordered_map<uint64_t, float>;
+using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
 
 enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid };
 
@@ -43,6 +49,33 @@
   uint64_t getFunctionHash() const { return FunctionHash; }
 };
 
+// A pseudo probe verifier that can be run after each IR passes to detect the
+// violation of updating probe factors. In principle, the sum of distribution
+// factor for a probe should be identical before and after a pass. For a
+// function pass, the factor sum for a probe would be typically 100%.
+class PseudoProbeVerifier {
+public:
+  void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+  // Implementation of pass instrumentation callbacks for new pass manager.
+  void runAfterPass(StringRef PassID, Any IR);
+
+private:
+  // Allow a little bias due the rounding to integral factors.
+  constexpr static float DistributionFactorVariance = 0.02;
+  // Distribution factors from last pass.
+  FuncProbeFactorMap FunctionProbeFactors;
+
+  void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors);
+  void runAfterPass(const Module *M);
+  void runAfterPass(const LazyCallGraph::SCC *C);
+  void runAfterPass(const Function *F);
+  void runAfterPass(const Loop *L);
+  bool shouldVerifyFunction(const Function *F);
+  void verifyProbeFactors(const Function *F,
+                          const ProbeFactorMap &ProbeFactors);
+};
+
 // This class serves sample counts correlation for SampleProfileLoader by
 // analyzing pseudo probes and their function descriptors injected by
 // SampleProfileProber.
@@ -102,5 +135,13 @@
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
+class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
+  void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
+
+public:
+  PseudoProbeUpdatePass() {}
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
 } // end namespace llvm
 #endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
Index: llvm/include/llvm/ProfileData/SampleProf.h
===================================================================
--- llvm/include/llvm/ProfileData/SampleProf.h
+++ llvm/include/llvm/ProfileData/SampleProf.h
@@ -347,6 +347,16 @@
     return SortedTargets;
   }
 
+  /// Prorate call targets by a distribution factor.
+  static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
+                                               float DistributionFactor) {
+    CallTargetMap AdjustedTargets;
+    for (const auto &I : Targets) {
+      AdjustedTargets[I.first()] = I.second * DistributionFactor;
+    }
+    return AdjustedTargets;
+  }
+
   /// Merge the samples in \p Other into this record.
   /// Optionally scale sample counts by \p Weight.
   sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
Index: llvm/include/llvm/Passes/StandardInstrumentations.h
===================================================================
--- llvm/include/llvm/Passes/StandardInstrumentations.h
+++ llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -22,6 +22,7 @@
 #include "llvm/IR/PassTimingInfo.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
 
 #include <string>
 #include <utility>
@@ -273,6 +274,7 @@
   OptBisectInstrumentation OptBisect;
   PreservedCFGCheckerInstrumentation PreservedCFGChecker;
   IRChangedPrinter PrintChangedIR;
+  PseudoProbeVerifier PseudoProbeVerification;
   VerifyInstrumentation Verify;
 
   bool VerifyEach;
Index: llvm/include/llvm/IR/PseudoProbe.h
===================================================================
--- llvm/include/llvm/IR/PseudoProbe.h
+++ llvm/include/llvm/IR/PseudoProbe.h
@@ -16,28 +16,39 @@
 #include "llvm/ADT/Optional.h"
 #include <cassert>
 #include <cstdint>
+#include <limits>
 
 namespace llvm {
 
 class Instruction;
+class BasicBlock;
 
 constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
 
 enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
 
+// The saturated distrution factor representing 100% for block probes.
+constexpr static uint64_t PseudoProbeFullDistributionFactor =
+    std::numeric_limits<uint64_t>::max();
+
 struct PseudoProbeDwarfDiscriminator {
+public:
   // The following APIs encodes/decodes per-probe information to/from a
   // 32-bit integer which is organized as:
   //  [2:0] - 0x7, this is reserved for regular discriminator,
   //          see DWARF discriminator encoding rule
   //  [18:3] - probe id
-  //  [25:19] - reserved
+  //  [25:19] - probe distribution factor
   //  [28:26] - probe type, see PseudoProbeType
   //  [31:29] - reserved for probe attributes
-  static uint32_t packProbeData(uint32_t Index, uint32_t Type) {
+  static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags,
+                                uint32_t Factor) {
     assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16");
     assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7");
-    return (Index << 3) | (Type << 26) | 0x7;
+    assert(Flags <= 0x7);
+    assert(Factor <= 100 &&
+           "Probe distribution factor too big to encode, exceeding 100");
+    return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7;
   }
 
   static uint32_t extractProbeIndex(uint32_t Value) {
@@ -51,16 +62,26 @@
   static uint32_t extractProbeAttributes(uint32_t Value) {
     return (Value >> 29) & 0x7;
   }
+
+  static uint32_t extractProbeFactor(uint32_t Value) {
+    return (Value >> 19) & 0x7F;
+  }
+
+  // The saturated distrution factor representing 100% for callsites.
+  constexpr static uint8_t FullDistributionFactor = 100;
 };
 
 struct PseudoProbe {
   uint32_t Id;
   uint32_t Type;
   uint32_t Attr;
+  float Factor;
 };
 
 Optional<PseudoProbe> extractProbe(const Instruction &Inst);
 
+void setProbeDistributionFactor(Instruction &Inst, float Factor);
+
 } // end namespace llvm
 
 #endif // LLVM_IR_PSEUDOPROBE_H
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1299,7 +1299,7 @@
 // Like the sideeffect intrinsic defined above, this intrinsic is treated by the 
 // optimizer as having opaque side effects so that it won't be get rid of or moved 
 // out of the block it probes.
-def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
                                     [IntrInaccessibleMemOnly, IntrWillReturn]>;
 
 // Intrinsics to support half precision floating point format
Index: llvm/include/llvm/IR/IntrinsicInst.h
===================================================================
--- llvm/include/llvm/IR/IntrinsicInst.h
+++ llvm/include/llvm/IR/IntrinsicInst.h
@@ -981,12 +981,16 @@
     return cast<ConstantInt>(const_cast<Value *>(getArgOperand(0)));
   }
 
+  ConstantInt *getIndex() const {
+    return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+  }
+
   ConstantInt *getAttributes() const {
     return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2)));
   }
 
-  ConstantInt *getIndex() const {
-    return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+  ConstantInt *getFactor() const {
+    return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
   }
 };
 
Index: clang/test/CodeGen/pseudo-probe-emit.c
===================================================================
--- clang/test/CodeGen/pseudo-probe-emit.c
+++ clang/test/CodeGen/pseudo-probe-emit.c
@@ -6,12 +6,12 @@
 void go();
 
 void foo(int x) {
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
   if (x == 0)
-    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0)
+    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
     bar();
   else
-    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0)
+    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
     go();
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0)
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to