vsk created this revision.

Lifting from Bob Wilson's notes: The hash value that we compute and store in
PGO profile data to detect out-of-date profiles does not include enough
information. This means that many significant changes to the source will not
cause compiler warnings about the profile being out of date, and worse, we may
continue to use the outdated profile data to make bad optimization decisions.
There is some tension here because some source changes won't affect PGO and we
don't want to invalidate the profile unnecessary.

This patch adds a new version of the PGO hash which is sensitive to 'goto',
'break', 'continue', and 'return'. It also draws a distinction between 'if'
statements with/without an 'else' block.

I chose these types of statements because their presence or absence
significantly alter control flow (to the point where any existing
profile data for the function becomes suspect). I picked these types of
statements after scanning through the subclasses of Expr and Stmt --
these seemed like the most important ones to handle. If there's
something missing, it will be easy to update the hash version again.

Profiles which use the old version of the PGO hash remain valid and can
be used without issue (there are already tests in tree for this).

rdar://17068282


https://reviews.llvm.org/D39446

Files:
  lib/CodeGen/CodeGenPGO.cpp
  test/Profile/Inputs/c-counter-overflows.proftext
  test/Profile/Inputs/c-general.proftext
  test/Profile/Inputs/cxx-throws.proftext

Index: test/Profile/Inputs/cxx-throws.proftext
===================================================================
--- test/Profile/Inputs/cxx-throws.proftext
+++ test/Profile/Inputs/cxx-throws.proftext
@@ -1,5 +1,5 @@
 _Z6throwsv
-18359008150154
+18371893052042
 9
 1
 100
Index: test/Profile/Inputs/c-general.proftext
===================================================================
--- test/Profile/Inputs/c-general.proftext
+++ test/Profile/Inputs/c-general.proftext
@@ -7,7 +7,7 @@
 75
 
 conditionals
-74917022372782735
+78295546727031439
 11
 1
 100
@@ -22,7 +22,7 @@
 100
 
 early_exits
-44128811889290
+44128811890058
 9
 1
 0
Index: test/Profile/Inputs/c-counter-overflows.proftext
===================================================================
--- test/Profile/Inputs/c-counter-overflows.proftext
+++ test/Profile/Inputs/c-counter-overflows.proftext
@@ -1,5 +1,5 @@
 main
-285734896137
+298619798025
 8
 1
 68719476720
Index: lib/CodeGen/CodeGenPGO.cpp
===================================================================
--- lib/CodeGen/CodeGenPGO.cpp
+++ lib/CodeGen/CodeGenPGO.cpp
@@ -47,6 +47,15 @@
   llvm::createPGOFuncNameMetadata(*Fn, FuncName);
 }
 
+/// The version of the PGO hash algorithm.
+enum PGOHashVersion : unsigned {
+  PGO_HASH_V1,
+  PGO_HASH_V2,
+
+  // Keep this set to the latest hash version.
+  PGO_HASH_LATEST = PGO_HASH_V2
+};
+
 namespace {
 /// \brief Stable hasher for PGO region counters.
 ///
@@ -61,6 +70,7 @@
 class PGOHash {
   uint64_t Working;
   unsigned Count;
+  PGOHashVersion HashVersion;
   llvm::MD5 MD5;
 
   static const int NumBitsPerType = 6;
@@ -93,22 +103,39 @@
     BinaryOperatorLAnd,
     BinaryOperatorLOr,
     BinaryConditionalOperator,
+    // The preceding values are available with PGO_HASH_V1.
+
+    GotoStmt,
+    IndirectGotoStmt,
+    BreakStmt,
+    ContinueStmt,
+    ReturnStmt,
+    IfElseStmt,
+    // The preceding values are available with PGO_HASH_V2.
 
     // Keep this last.  It's for the static assert that follows.
     LastHashType
   };
   static_assert(LastHashType <= TooBig, "Too many types in HashType");
 
-  // TODO: When this format changes, take in a version number here, and use the
-  // old hash calculation for file formats that used the old hash.
-  PGOHash() : Working(0), Count(0) {}
+  PGOHash(PGOHashVersion HashVersion)
+      : Working(0), Count(0), HashVersion(HashVersion), MD5() {}
   void combine(HashType Type);
   uint64_t finalize();
+  PGOHashVersion getHashVersion() const { return HashVersion; }
 };
 const int PGOHash::NumBitsPerType;
 const unsigned PGOHash::NumTypesPerWord;
 const unsigned PGOHash::TooBig;
 
+/// Get the PGO hash version used in the given indexed profile.
+static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
+                                        CodeGenModule &CGM) {
+  if (PGOReader->getVersion() <= 4)
+    return PGO_HASH_V1;
+  return PGO_HASH_V2;
+}
+
 /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
   /// The next counter value to assign.
@@ -118,8 +145,9 @@
   /// The map of statements to counters.
   llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
 
-  MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
-      : NextCounter(0), CounterMap(CounterMap) {}
+  MapRegionCounters(PGOHashVersion HashVersion,
+                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
+      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap) {}
 
   // Blocks and lambdas are handled as separate functions, so we need not
   // traverse them in the parent context.
@@ -146,15 +174,23 @@
   }
 
   bool VisitStmt(const Stmt *S) {
-    auto Type = getHashType(S);
+    // Use V1 of the hash to determine counter mappings.
+    auto Type = getHashType(PGO_HASH_V1, S);
     if (Type == PGOHash::None)
       return true;
 
     CounterMap[S] = NextCounter++;
+
+    // If available, use a newer hash algorithm for the source hash.
+    if (Hash.getHashVersion() != PGO_HASH_V1)
+      Type = getHashType(Hash.getHashVersion(), S);
+
     Hash.combine(Type);
     return true;
   }
-  PGOHash::HashType getHashType(const Stmt *S) {
+
+  /// Get version \p HashVersion of the PGO hash for \p S.
+  PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
     switch (S->getStmtClass()) {
     default:
       break;
@@ -176,8 +212,14 @@
       return PGOHash::CaseStmt;
     case Stmt::DefaultStmtClass:
       return PGOHash::DefaultStmt;
-    case Stmt::IfStmtClass:
+    case Stmt::IfStmtClass: {
+      if (HashVersion == PGO_HASH_V2) {
+        auto *If = cast<IfStmt>(S);
+        if (If->getElse())
+          return PGOHash::IfElseStmt;
+      }
       return PGOHash::IfStmt;
+    }
     case Stmt::CXXTryStmtClass:
       return PGOHash::CXXTryStmt;
     case Stmt::CXXCatchStmtClass:
@@ -195,6 +237,24 @@
       break;
     }
     }
+
+    if (HashVersion == PGO_HASH_V2) {
+      switch (S->getStmtClass()) {
+      default:
+        break;
+      case Stmt::GotoStmtClass:
+        return PGOHash::GotoStmt;
+      case Stmt::IndirectGotoStmtClass:
+        return PGOHash::IndirectGotoStmt;
+      case Stmt::BreakStmtClass:
+        return PGOHash::BreakStmt;
+      case Stmt::ContinueStmtClass:
+        return PGOHash::ContinueStmt;
+      case Stmt::ReturnStmtClass:
+        return PGOHash::ReturnStmt;
+      }
+    }
+
     return PGOHash::None;
   }
 };
@@ -653,8 +713,14 @@
 }
 
 void CodeGenPGO::mapRegionCounters(const Decl *D) {
+  // Use the latest hash version when inserting instrumentation, but use the
+  // version in the indexed profile if we're reading PGO data.
+  PGOHashVersion HashVersion = PGO_HASH_LATEST;
+  if (auto *PGOReader = CGM.getPGOReader())
+    HashVersion = getPGOHashVersion(PGOReader, CGM);
+
   RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
-  MapRegionCounters Walker(*RegionCounterMap);
+  MapRegionCounters Walker(HashVersion, *RegionCounterMap);
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
     Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
   else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to