Author: gordon
Date: Sun Jan  6 19:30:53 2008
New Revision: 45670

With this patch, the LowerGC transformation becomes the
ShadowStackCollector, which additionally has reduced overhead with
no sacrifice in portability.

Considering a function @fun with 8 loop-local roots,
ShadowStackCollector introduces the following overhead

; shadowstack prologue
        movl    L_llvm_gc_root_chain$non_lazy_ptr, %eax
        movl    (%eax), %ecx
        movl    $___gc_fun, 20(%esp)
        movl    $0, 24(%esp)
        movl    $0, 28(%esp)
        movl    $0, 32(%esp)
        movl    $0, 36(%esp)
        movl    $0, 40(%esp)
        movl    $0, 44(%esp)
        movl    $0, 48(%esp)
        movl    $0, 52(%esp)
        movl    %ecx, 16(%esp)
        leal    16(%esp), %ecx
        movl    %ecx, (%eax)

; shadowstack loop overhead

; shadowstack epilogue
        movl    48(%esp), %edx
        movl    %edx, (%ecx)

; shadowstack metadata
        .align  3
___gc_fun:                              # __gc_fun
        .long   8
        .space  4

In comparison to LowerGC:

; lowergc prologue
        movl    L_llvm_gc_root_chain$non_lazy_ptr, %eax
        movl    (%eax), %ecx
        movl    %ecx, 48(%esp)
        movl    $8, 52(%esp)
        movl    $0, 60(%esp)
        movl    $0, 56(%esp)
        movl    $0, 68(%esp)
        movl    $0, 64(%esp)
        movl    $0, 76(%esp)
        movl    $0, 72(%esp)
        movl    $0, 84(%esp)
        movl    $0, 80(%esp)
        movl    $0, 92(%esp)
        movl    $0, 88(%esp)
        movl    $0, 100(%esp)
        movl    $0, 96(%esp)
        movl    $0, 108(%esp)
        movl    $0, 104(%esp)
        movl    $0, 116(%esp)
        movl    $0, 112(%esp)

; lowergc loop overhead
        leal    44(%esp), %eax
        movl    %eax, 56(%esp)
        leal    40(%esp), %eax
        movl    %eax, 64(%esp)
        leal    36(%esp), %eax
        movl    %eax, 72(%esp)
        leal    32(%esp), %eax
        movl    %eax, 80(%esp)
        leal    28(%esp), %eax
        movl    %eax, 88(%esp)
        leal    24(%esp), %eax
        movl    %eax, 96(%esp)
        leal    20(%esp), %eax
        movl    %eax, 104(%esp)
        leal    16(%esp), %eax
        movl    %eax, 112(%esp)

; lowergc epilogue
        movl    48(%esp), %edx
        movl    %edx, (%ecx)

; lowergc metadata


Modified: llvm/trunk/include/llvm/CodeGen/LinkAllCodegenComponents.h

--- llvm/trunk/include/llvm/CodeGen/LinkAllCodegenComponents.h (original)
+++ llvm/trunk/include/llvm/CodeGen/LinkAllCodegenComponents.h Sun Jan  6 
19:30:53 2008
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/Collectors.h"
 namespace {
   struct ForceCodegenLinking {
@@ -35,6 +36,8 @@
       (void) llvm::createSimpleRegisterCoalescer();
+      (void) llvm::createShadowStackCollector();
       (void) llvm::createBURRListDAGScheduler(NULL, NULL, NULL);
       (void) llvm::createTDRRListDAGScheduler(NULL, NULL, NULL);
       (void) llvm::createTDListDAGScheduler(NULL, NULL, NULL);

Modified: llvm/trunk/include/llvm/LinkAllPasses.h

--- llvm/trunk/include/llvm/LinkAllPasses.h (original)
+++ llvm/trunk/include/llvm/LinkAllPasses.h Sun Jan  6 19:30:53 2008
@@ -84,7 +84,6 @@
       (void) llvm::createLoopRotatePass();
       (void) llvm::createLoopIndexSplitPass();
       (void) llvm::createLowerAllocationsPass();
-      (void) llvm::createLowerGCPass();
       (void) llvm::createLowerInvokePass();
       (void) llvm::createLowerPackedPass();
       (void) llvm::createLowerSelectPass();

Modified: llvm/trunk/include/llvm/Transforms/Scalar.h

--- llvm/trunk/include/llvm/Transforms/Scalar.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar.h Sun Jan  6 19:30:53 2008
@@ -296,13 +296,6 @@
-// LowerGCPass - This function returns an instance of the "lowergc" pass, which
-// lowers garbage collection intrinsics to normal LLVM code.
-FunctionPass *createLowerGCPass();
 // BlockPlacement - This pass reorders basic blocks in order to increase the
 // number of fall-through conditional branches.

Added: llvm/trunk/lib/CodeGen/ShadowStackCollector.cpp

--- llvm/trunk/lib/CodeGen/ShadowStackCollector.cpp (added)
+++ llvm/trunk/lib/CodeGen/ShadowStackCollector.cpp Sun Jan  6 19:30:53 2008
@@ -0,0 +1,441 @@
+//===-- ShadowStackCollector.cpp - GC support for uncooperative targets 
+//                     The LLVM Compiler Infrastructure
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// This file implements lowering for the llvm.gc* intrinsics for targets that 
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as collectors which generate stack maps
+// to identify roots.
+// This pass implements the code transformation described in this paper:
+//   "Accurate Garbage Collection in an Uncooperative Environment"
+//   Fergus Henderson, ISMM, 2002
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// this collector.
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/Collectors.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Collector.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/LLVMBuilder.h"
+#include "llvm/Analysis/Verifier.h"
+#include <cstdlib>
+using namespace llvm;
+namespace {
+  class VISIBILITY_HIDDEN ShadowStackCollector : public Collector {
+    /// RootChain - This is the global linked-list that contains the chain of 
+    /// roots.
+    GlobalVariable *Head;
+    /// StackEntryTy - Abstract type of a link in the shadow stack.
+    /// 
+    const StructType *StackEntryTy;
+    /// Roots - GC roots in the current function. Each is a pair of the
+    /// intrinsic call and its corresponding alloca.
+    std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+  public:
+    ShadowStackCollector();
+    bool initializeCustomLowering(Module &M);
+    bool performCustomLowering(Function &F);
+  private:
+    bool IsNullValue(Value *V);
+    Constant *GetFrameMap(Function &F);
+    const Type* GetConcreteStackEntryType(Function &F);
+    void CollectRoots(Function &F);
+    static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr,
+                                        int Idx1, const char *Name);
+    static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr,
+                                        int Idx1, int Idx2, const char *Name);
+  };
+  CollectorRegistry::Add<ShadowStackCollector>
+  Y("shadow-stack",
+    "Very portable collector for uncooperative code generators");
+  /// EscapeEnumerator - This is a little algorithm to find all escape points
+  /// from a function so that "finally"-style code can be inserted. In addition
+  /// to finding the existing return and unwind instructions, it also (if
+  /// necessary) transforms any call instructions into invokes and sends them 
+  /// a landing pad.
+  /// 
+  /// It's wrapped up in a state machine using the same transform C# uses for
+  /// 'yield return' enumerators, This transform allows it to be 
+  class VISIBILITY_HIDDEN EscapeEnumerator {
+    Function &F;
+    const char *CleanupBBName;
+    // State.
+    int State;
+    Function::iterator StateBB, StateE;
+    LLVMBuilder Builder;
+  public:
+    EscapeEnumerator(Function &F, const char *N = "cleanup")
+      : F(F), CleanupBBName(N), State(0) {}
+    LLVMBuilder *Next() {
+      switch (State) {
+      default:
+        return 0;
+      case 0:
+        StateBB = F.begin();
+        StateE = F.end();
+        State = 1;
+      case 1:
+        // Find all 'return' and 'unwind' instructions.
+        while (StateBB != StateE) {
+          BasicBlock *CurBB = StateBB++;
+          // Branches and invokes do not escape, only unwind and return do.
+          TerminatorInst *TI = CurBB->getTerminator();
+          if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI))
+            continue;
+          Builder.SetInsertPoint(TI->getParent(), TI);
+          return &Builder;
+        }
+        State = 2;
+        // Find all 'call' instructions.
+        SmallVector<Instruction*,16> Calls;
+        for (Function::iterator BB = F.begin(),
+                                E = F.end(); BB != E; ++BB)
+          for (BasicBlock::iterator II = BB->begin(),
+                                    EE = BB->end(); II != EE; ++II)
+            if (CallInst *CI = dyn_cast<CallInst>(II))
+              if (!CI->getCalledFunction() ||
+                  !CI->getCalledFunction()->getIntrinsicID())
+                Calls.push_back(CI);
+        if (Calls.empty())
+          return 0;
+        // Create a cleanup block.
+        BasicBlock *CleanupBB = new BasicBlock(CleanupBBName, &F);
+        UnwindInst *UI = new UnwindInst(CleanupBB);
+        // Transform the 'call' instructions into 'invoke's branching to the
+        // cleanup block. Go in reverse order to make prettier BB names.
+        SmallVector<Value*,16> Args;
+        for (unsigned I = Calls.size(); I != 0; ) {
+          CallInst *CI = cast<CallInst>(Calls[--I]);
+          // Split the basic block containing the function call.
+          BasicBlock *CallBB = CI->getParent();
+          BasicBlock *NewBB =
+            CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+          // Remove the unconditional branch inserted at the end of CallBB.
+          CallBB->getInstList().pop_back();
+          NewBB->getInstList().remove(CI);
+          // Create a new invoke instruction.
+          Args.clear();
+          Args.append(CI->op_begin() + 1, CI->op_end());
+          InvokeInst *II = new InvokeInst(CI->getOperand(0),
+                                          NewBB, CleanupBB,
+                                          Args.begin(), Args.end(),
+                                          CI->getName(), CallBB);
+          II->setCallingConv(CI->getCallingConv());
+          II->setParamAttrs(CI->getParamAttrs());
+          CI->replaceAllUsesWith(II);
+          delete CI;
+        }
+        Builder.SetInsertPoint(UI->getParent(), UI);
+        return &Builder;
+      }
+    }
+  };
+Collector *llvm::createShadowStackCollector() {
+  return new ShadowStackCollector();
+ShadowStackCollector::ShadowStackCollector() : Head(0), StackEntryTy(0) {
+  InitRoots = true;
+  CustomRoots = true;
+Constant *ShadowStackCollector::GetFrameMap(Function &F) {
+  // doInitialization creates the abstract type of this value.
+  Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
+  // Truncate the ShadowStackDescriptor if some metadata is null.
+  unsigned NumMeta = 0;
+  SmallVector<Constant*,16> Metadata;
+  for (unsigned I = 0; I != Roots.size(); ++I) {
+    Constant *C = cast<Constant>(Roots[I].first->getOperand(2));
+    if (!C->isNullValue())
+      NumMeta = I + 1;
+    Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+  }
+  Constant *BaseElts[] = {
+    ConstantInt::get(Type::Int32Ty, Roots.size(), false),
+    ConstantInt::get(Type::Int32Ty, NumMeta, false),
+  };
+  Constant *DescriptorElts[] = {
+    ConstantStruct::get(BaseElts, 2),
+    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
+                       Metadata.begin(), NumMeta)
+  };
+  Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2);
+  std::string TypeName("gc_map.");
+  TypeName += utostr(NumMeta);
+  F.getParent()->addTypeName(TypeName, FrameMap->getType());
+  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+  //        that, short of multithreaded LLVM, it should be safe; all that is
+  //        necessary is that a simple Module::iterator loop not be 
+  //        Appending to the GlobalVariable list is safe in that sense.
+  // 
+  //        All of the output passes emit globals last. The ExecutionEngine
+  //        explicitly supports adding globals to the module after
+  //        initialization.
+  // 
+  //        Still, if it isn't deemed acceptable, then this transformation 
+  //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
+  //        (which uses a FunctionPassManager (which segfaults (not asserts) if
+  //        provided a ModulePass))).
+  Constant *GV = new GlobalVariable(FrameMap->getType(), true,
+                                    GlobalVariable::InternalLinkage,
+                                    FrameMap, "__gc_" + F.getName(),
+                                    F.getParent());
+  Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0),
+                              ConstantInt::get(Type::Int32Ty, 0) };
+  return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
+const Type* ShadowStackCollector::GetConcreteStackEntryType(Function &F) {
+  // doInitialization creates the generic version of this type.
+  std::vector<const Type*> EltTys;
+  EltTys.push_back(StackEntryTy);
+  for (size_t I = 0; I != Roots.size(); I++)
+    EltTys.push_back(Roots[I].second->getAllocatedType());
+  Type *Ty = StructType::get(EltTys);
+  std::string TypeName("gc_stackentry.");
+  TypeName += F.getName();
+  F.getParent()->addTypeName(TypeName, Ty);
+  return Ty;
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackCollector::initializeCustomLowering(Module &M) {
+  // struct FrameMap {
+  //   int32_t NumRoots; // Number of roots in stack frame.
+  //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
+  //   void *Meta[];     // May be absent for roots without metadata.
+  // };
+  std::vector<const Type*> EltTys;
+  EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. 
+  EltTys.push_back(Type::Int32Ty); // Specifies length of variable length 
+  StructType *FrameMapTy = StructType::get(EltTys);
+  M.addTypeName("gc_map", FrameMapTy);
+  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+  // struct StackEntry {
+  //   ShadowStackEntry *Next; // Caller's stack entry.
+  //   FrameMap *Map;          // Pointer to constant FrameMap.
+  //   void *Roots[];          // Stack roots (in-place array, so we pretend).
+  // };
+  OpaqueType *RecursiveTy = OpaqueType::get();
+  EltTys.clear();
+  EltTys.push_back(PointerType::getUnqual(RecursiveTy));
+  EltTys.push_back(FrameMapPtrTy);
+  PATypeHolder LinkTyH = StructType::get(EltTys);
+  RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
+  StackEntryTy = cast<StructType>(LinkTyH.get());
+  const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+  M.addTypeName("gc_stackentry", LinkTyH.get());  // FIXME: Is this safe from
+                                                  //        a FunctionPass?
+  // Get the root chain if it already exists.
+  Head = M.getGlobalVariable("llvm_gc_root_chain");
+  if (!Head) {
+    // If the root chain does not exist, insert a new one with linkonce
+    // linkage!
+    Head = new GlobalVariable(StackEntryPtrTy, false,
+                              GlobalValue::LinkOnceLinkage,
+                              Constant::getNullValue(StackEntryPtrTy),
+                              "llvm_gc_root_chain", &M);
+  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+    Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+    Head->setLinkage(GlobalValue::LinkOnceLinkage);
+  }
+  return true;
+bool ShadowStackCollector::IsNullValue(Value *V) {
+  if (Constant *C = dyn_cast<Constant>(V))
+    return C->isNullValue();
+  return false;
+void ShadowStackCollector::CollectRoots(Function &F) {
+  // FIXME: Account for original alignment. Could fragment the root array.
+  //   Approach 1: Null initialize empty slots at runtime. Yuck.
+  //   Approach 2: Emit a map of the array instead of just a count.
+  assert(Roots.empty() && "Not cleaned up?");
+  SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+        if (Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::gcroot) {
+            std::pair<CallInst*,AllocaInst*> Pair = std::make_pair(
+              CI, cast<AllocaInst>(
+                    IntrinsicInst::StripPointerCasts(CI->getOperand(1))));
+            if (IsNullValue(CI->getOperand(2)))
+              Roots.push_back(Pair);
+            else
+              MetaRoots.push_back(Pair);
+          }
+  // Number roots with metadata (usually empty) at the beginning, so that the
+  // FrameMap::Meta array can be elided.
+  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+GetElementPtrInst *
+ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr,
+                                int Idx, int Idx2, const char *Name) {
+  Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
+                       ConstantInt::get(Type::Int32Ty, Idx),
+                       ConstantInt::get(Type::Int32Ty, Idx2) };
+  return B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
+GetElementPtrInst *
+ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr,
+                                int Idx, const char *Name) {
+  Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
+                       ConstantInt::get(Type::Int32Ty, Idx) };
+  return B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackCollector::performCustomLowering(Function &F) {
+  // Find calls to llvm.gcroot.
+  CollectRoots(F);
+  // If there are no roots in this function, then there is no need to add a
+  // stack map entry for it.
+  if (Roots.empty())
+    return false;
+  // Build the constant map and figure the type of the shadow stack entry.
+  Value *FrameMap = GetFrameMap(F);
+  const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+  // Build the shadow stack entry at the very start of the function.
+  BasicBlock::iterator IP = F.getEntryBlock().begin();
+  LLVMBuilder AtEntry(IP->getParent(), IP);
+  Instruction *StackEntry   = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+                                                   "gc_frame");
+  while (isa<AllocaInst>(IP)) ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+  // Initialize the map pointer and load the current head of the shadow stack.
+  Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead");
+  Instruction *EntryMapPtr  = CreateGEP(AtEntry, 
+                              AtEntry.CreateStore(FrameMap, EntryMapPtr);
+  // After all the allocas...
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    // For each root, find the corresponding slot in the aggregate...
+    Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root");
+    // And use it in lieu of the alloca.
+    AllocaInst *OriginalAlloca = Roots[I].second;
+    SlotPtr->takeName(OriginalAlloca);
+    OriginalAlloca->replaceAllUsesWith(SlotPtr);
+  }
+  // Move past the original stores inserted by Collector::InitRoots. This isn't
+  // really necessary (the collector would never see the intermediate state),
+  // but it's nicer not to push the half-initialized entry onto the stack.
+  while (isa<StoreInst>(IP)) ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+  // Push the entry onto the shadow stack.
+  Instruction *EntryNextPtr = 
+  Instruction *NewHeadVal   = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead");
+                              AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+                              AtEntry.CreateStore(NewHeadVal, Head);
+  // For each instruction that escapes...
+  EscapeEnumerator EE(F, "gc_cleanup");
+  while (LLVMBuilder *AtExit = EE.Next()) {
+    // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+    // AtEntry, since that would make the value live for the entire function.
+    Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0,
+                                           "");
+    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+                       AtExit->CreateStore(SavedHead, Head);
+  }
+  // Delete the original allocas (which are no longer used) and the intrinsic
+  // calls (which are no longer valid). Doing this last avoids invalidating
+  // iterators.
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    Roots[I].first->eraseFromParent();
+    Roots[I].second->eraseFromParent();
+  }
+  F.dump();
+  Roots.clear();
+  return true;

Modified: llvm/trunk/lib/Transforms/Scalar/LowerGC.cpp

--- llvm/trunk/lib/Transforms/Scalar/LowerGC.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LowerGC.cpp Sun Jan  6 19:30:53 2008
@@ -1,350 +0,0 @@
-//===-- LowerGC.cpp - Provide GC support for targets that don't 
-//                     The LLVM Compiler Infrastructure
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// This file implements lowering for the llvm.gc* intrinsics for targets that 
-// not natively support them (which includes the C backend).  Note that the 
-// generated is not as efficient as it would be for targets that natively
-// support the GC intrinsics, but it is useful for getting new targets
-// up-and-running quickly.
-// This pass implements the code transformation described in this paper:
-//   "Accurate Garbage Collection in an Uncooperative Environment"
-//   Fergus Henderson, ISMM, 2002
-#define DEBUG_TYPE "lowergc"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
-namespace {
-  class VISIBILITY_HIDDEN LowerGC : public FunctionPass {
-    /// GCRootInt, GCReadInt, GCWriteInt - The function prototypes for the
-    /// llvm.gcread/llvm.gcwrite/llvm.gcroot intrinsics.
-    Function *GCRootInt, *GCReadInt, *GCWriteInt;
-    /// GCRead/GCWrite - These are the functions provided by the garbage
-    /// collector for read/write barriers.
-    Constant *GCRead, *GCWrite;
-    /// RootChain - This is the global linked-list that contains the chain of 
-    /// roots.
-    GlobalVariable *RootChain;
-    /// MainRootRecordType - This is the type for a function root entry if it
-    /// had zero roots.
-    const Type *MainRootRecordType;
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    LowerGC() : FunctionPass((intptr_t)&ID), 
-                GCRootInt(0), GCReadInt(0), GCWriteInt(0),
-                GCRead(0), GCWrite(0), RootChain(0), MainRootRecordType(0) {}
-    virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function &F);
-  private:
-    const StructType *getRootRecordType(unsigned NumRoots);
-  };
-  char LowerGC::ID = 0;
-  RegisterPass<LowerGC>
-  X("lowergc", "Lower GC intrinsics, for GCless code generators");
-/// createLowerGCPass - This function returns an instance of the "lowergc"
-/// pass, which lowers garbage collection intrinsics to normal LLVM code.
-FunctionPass *llvm::createLowerGCPass() {
-  return new LowerGC();
-/// getRootRecordType - This function creates and returns the type for a root
-/// record containing 'NumRoots' roots.
-const StructType *LowerGC::getRootRecordType(unsigned NumRoots) {
-  // Build a struct that is a type used for meta-data/root pairs.
-  std::vector<const Type *> ST;
-  ST.push_back(GCRootInt->getFunctionType()->getParamType(0));
-  ST.push_back(GCRootInt->getFunctionType()->getParamType(1));
-  StructType *PairTy = StructType::get(ST);
-  // Build the array of pairs.
-  ArrayType *PairArrTy = ArrayType::get(PairTy, NumRoots);
-  // Now build the recursive list type.
-  PATypeHolder RootListH =
-    MainRootRecordType ? (Type*)MainRootRecordType : (Type*)OpaqueType::get();
-  ST.clear();
-  ST.push_back(PointerType::getUnqual(RootListH));         // Prev pointer
-  ST.push_back(Type::Int32Ty);                       // NumElements in array
-  ST.push_back(PairArrTy);                           // The pairs
-  StructType *RootList = StructType::get(ST);
-  if (MainRootRecordType)
-    return RootList;
-  assert(NumRoots == 0 && "The main struct type should have zero entries!");
-  cast<OpaqueType>((Type*)RootListH.get())->refineAbstractTypeTo(RootList);
-  MainRootRecordType = RootListH;
-  return cast<StructType>(RootListH.get());
-/// doInitialization - If this module uses the GC intrinsics, find them now.  
-/// not, this pass does not do anything.
-bool LowerGC::doInitialization(Module &M) {
-  GCRootInt  = M.getFunction("llvm.gcroot");
-  GCReadInt  = M.getFunction("llvm.gcread");
-  GCWriteInt = M.getFunction("llvm.gcwrite");
-  if (!GCRootInt && !GCReadInt && !GCWriteInt) return false;
-  PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
-  PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr);
-  // If the program is using read/write barriers, find the implementations of
-  // them from the GC runtime library.
-  if (GCReadInt)        // Make:  sbyte* %llvm_gc_read(sbyte**)
-    GCRead = M.getOrInsertFunction("llvm_gc_read", VoidPtr, VoidPtr, 
-                                   (Type *)0);
-  if (GCWriteInt)       // Make:  void %llvm_gc_write(sbyte*, sbyte**)
-    GCWrite = M.getOrInsertFunction("llvm_gc_write", Type::VoidTy,
-                                    VoidPtr, VoidPtr, VoidPtrPtr, (Type *)0);
-  // If the program has GC roots, get or create the global root list.
-  if (GCRootInt) {
-    const StructType *RootListTy = getRootRecordType(0);
-    const Type *PRLTy = PointerType::getUnqual(RootListTy);
-    M.addTypeName("llvm_gc_root_ty", RootListTy);
-    // Get the root chain if it already exists.
-    RootChain = M.getGlobalVariable("llvm_gc_root_chain", PRLTy);
-    if (RootChain == 0) {
-      // If the root chain does not exist, insert a new one with linkonce
-      // linkage!
-      RootChain = new GlobalVariable(PRLTy, false,
-                                     GlobalValue::LinkOnceLinkage,
-                                     Constant::getNullValue(PRLTy),
-                                     "llvm_gc_root_chain", &M);
-    } else if (RootChain->hasExternalLinkage() && RootChain->isDeclaration()) {
-      RootChain->setInitializer(Constant::getNullValue(PRLTy));
-      RootChain->setLinkage(GlobalValue::LinkOnceLinkage);
-    }
-  }
-  return true;
-/// Coerce - If the specified operand number of the specified instruction does
-/// not have the specified type, insert a cast. Note that this only uses 
-/// because the types involved are all pointers.
-static void Coerce(Instruction *I, unsigned OpNum, Type *Ty) {
-  if (I->getOperand(OpNum)->getType() != Ty) {
-    if (Constant *C = dyn_cast<Constant>(I->getOperand(OpNum)))
-      I->setOperand(OpNum, ConstantExpr::getBitCast(C, Ty));
-    else {
-      CastInst *CI = new BitCastInst(I->getOperand(OpNum), Ty, "", I);
-      I->setOperand(OpNum, CI);
-    }
-  }
-/// runOnFunction - If the program is using GC intrinsics, replace any
-/// read/write intrinsics with the appropriate read/write barrier calls, then
-/// inline them.  Finally, build the data structures for
-bool LowerGC::runOnFunction(Function &F) {
-  // Quick exit for programs that are not using GC mechanisms.
-  if (!GCRootInt && !GCReadInt && !GCWriteInt) return false;
-  PointerType *VoidPtr    = PointerType::getUnqual(Type::Int8Ty);
-  PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr);
-  // If there are read/write barriers in the program, perform a quick pass over
-  // the function eliminating them.  While we are at it, remember where we see
-  // calls to llvm.gcroot.
-  std::vector<CallInst*> GCRoots;
-  std::vector<CallInst*> NormalCalls;
-  bool MadeChange = false;
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
-      if (CallInst *CI = dyn_cast<CallInst>(II++)) {
-        if (!CI->getCalledFunction() ||
-            !CI->getCalledFunction()->isIntrinsic())
-          NormalCalls.push_back(CI);   // Remember all normal function calls.
-        if (Function *F = CI->getCalledFunction())
-          if (F == GCRootInt)
-            GCRoots.push_back(CI);
-          else if (F == GCReadInt || F == GCWriteInt) {
-            if (F == GCWriteInt) {
-              // Change a llvm.gcwrite call to call llvm_gc_write instead.
-              CI->setOperand(0, GCWrite);
-              // Insert casts of the operands as needed.
-              Coerce(CI, 1, VoidPtr);
-              Coerce(CI, 2, VoidPtr);
-              Coerce(CI, 3, VoidPtrPtr);
-            } else {
-              Coerce(CI, 1, VoidPtr);
-              Coerce(CI, 2, VoidPtrPtr);
-              if (CI->getType() == VoidPtr) {
-                CI->setOperand(0, GCRead);
-              } else {
-                // Create a whole new call to replace the old one.
-                // It sure would be nice to pass op_begin()+1,
-                // op_begin()+2 but it runs into trouble with
-                // CallInst::init's &*iterator, which requires a
-                // conversion from Use* to Value*.  The conversion
-                // from Use to Value * is not useful because the
-                // memory for Value * won't be contiguous.
-                Value* Args[] = {
-                  CI->getOperand(1),
-                  CI->getOperand(2) 
-                };
-                CallInst *NC = new CallInst(GCRead, Args, Args + 2,
-                                            CI->getName(), CI);
-                // These functions only deal with ptr type results so BitCast
-                // is the correct kind of cast (no-op cast).
-                Value *NV = new BitCastInst(NC, CI->getType(), "", CI);
-                CI->replaceAllUsesWith(NV);
-                BB->getInstList().erase(CI);
-                CI = NC;
-              }
-            }
-            MadeChange = true;
-          }
-      }
-  // If there are no GC roots in this function, then there is no need to create
-  // a GC list record for it.
-  if (GCRoots.empty()) return MadeChange;
-  // Okay, there are GC roots in this function.  On entry to the function, add 
-  // record to the llvm_gc_root_chain, and remove it on exit.
-  // Create the alloca, and zero it out.
-  const StructType *RootListTy = getRootRecordType(GCRoots.size());
-  AllocaInst *AI = new AllocaInst(RootListTy, 0, "gcroots", 
-  // Insert the memset call after all of the allocas in the function.
-  BasicBlock::iterator IP = AI;
-  while (isa<AllocaInst>(IP)) ++IP;
-  Constant *Zero = ConstantInt::get(Type::Int32Ty, 0);
-  Constant *One  = ConstantInt::get(Type::Int32Ty, 1);
-  Value *Idx[2] = { Zero, Zero };
-  // Get a pointer to the prev pointer.
-  Value *PrevPtrPtr = new GetElementPtrInst(AI, Idx, Idx + 2,
-                                            "prevptrptr", IP);
-  // Load the previous pointer.
-  Value *PrevPtr = new LoadInst(RootChain, "prevptr", IP);
-  // Store the previous pointer into the prevptrptr
-  new StoreInst(PrevPtr, PrevPtrPtr, IP);
-  // Set the number of elements in this record.
-  Idx[1] = One;
-  Value *NumEltsPtr = new GetElementPtrInst(AI, Idx, Idx + 2,
-                                            "numeltsptr", IP);
-  new StoreInst(ConstantInt::get(Type::Int32Ty, GCRoots.size()), 
-  Value* Par[4];
-  Par[0] = Zero;
-  Par[1] = ConstantInt::get(Type::Int32Ty, 2);
-  const PointerType *PtrLocTy =
-    cast<PointerType>(GCRootInt->getFunctionType()->getParamType(0));
-  Constant *Null = ConstantPointerNull::get(PtrLocTy);
-  // Initialize all of the gcroot records now.
-  for (unsigned i = 0, e = GCRoots.size(); i != e; ++i) {
-    // Initialize the meta-data pointer.
-    Par[2] = ConstantInt::get(Type::Int32Ty, i);
-    Par[3] = One;
-    Value *MetaDataPtr = new GetElementPtrInst(AI, Par, Par + 4,
-                                               "MetaDataPtr", IP);
-    assert(isa<Constant>(GCRoots[i]->getOperand(2)) && "Must be a constant");
-    new StoreInst(GCRoots[i]->getOperand(2), MetaDataPtr, IP);
-    // Initialize the root pointer to null on entry to the function.
-    Par[3] = Zero;
-    Value *RootPtrPtr = new GetElementPtrInst(AI, Par, Par + 4,
-                                              "RootEntPtr", IP);
-    new StoreInst(Null, RootPtrPtr, IP);
-    // Each occurrance of the llvm.gcroot intrinsic now turns into an
-    // initialization of the slot with the address.
-    new StoreInst(GCRoots[i]->getOperand(1), RootPtrPtr, GCRoots[i]);
-  }
-  // Now that the record is all initialized, store the pointer into the global
-  // pointer.
-  Value *C = new BitCastInst(AI, PointerType::getUnqual(MainRootRecordType), 
"", IP);
-  new StoreInst(C, RootChain, IP);
-  // Eliminate all the gcroot records now.
-  for (unsigned i = 0, e = GCRoots.size(); i != e; ++i)
-    GCRoots[i]->getParent()->getInstList().erase(GCRoots[i]);
-  // On exit from the function we have to remove the entry from the GC root
-  // chain.  Doing this is straight-forward for return and unwind instructions:
-  // just insert the appropriate copy.
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    if (isa<UnwindInst>(BB->getTerminator()) ||
-        isa<ReturnInst>(BB->getTerminator())) {
-      // We could reuse the PrevPtr loaded on entry to the function, but this
-      // would make the value live for the whole function, which is probably a
-      // bad idea.  Just reload the value out of our stack entry.
-      PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", BB->getTerminator());
-      new StoreInst(PrevPtr, RootChain, BB->getTerminator());
-    }
-  // If an exception is thrown from a callee we have to make sure to
-  // unconditionally take the record off the stack.  For this reason, we turn
-  // all call instructions into invoke whose cleanup pops the entry off the
-  // stack.  We only insert one cleanup block, which is shared by all invokes.
-  if (!NormalCalls.empty()) {
-    // Create the shared cleanup block.
-    BasicBlock *Cleanup = new BasicBlock("gc_cleanup", &F);
-    UnwindInst *UI = new UnwindInst(Cleanup);
-    PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", UI);
-    new StoreInst(PrevPtr, RootChain, UI);
-    // Loop over all of the function calls, turning them into invokes.
-    while (!NormalCalls.empty()) {
-      CallInst *CI = NormalCalls.back();
-      BasicBlock *CBB = CI->getParent();
-      NormalCalls.pop_back();
-      // Split the basic block containing the function call.
-      BasicBlock *NewBB = CBB->splitBasicBlock(CI, CBB->getName()+".cont");
-      // Remove the unconditional branch inserted at the end of the CBB.
-      CBB->getInstList().pop_back();
-      NewBB->getInstList().remove(CI);
-      // Create a new invoke instruction.
-      std::vector<Value*> Args(CI->op_begin()+1, CI->op_end());
-      Value *II = new InvokeInst(CI->getCalledValue(), NewBB, Cleanup,
-                                 Args.begin(), Args.end(), CI->getName(), CBB);
-      cast<InvokeInst>(II)->setCallingConv(CI->getCallingConv());
-      cast<InvokeInst>(II)->setParamAttrs(CI->getParamAttrs());
-      CI->replaceAllUsesWith(II);
-      delete CI;
-    }
-  }
-  return true;

Modified: llvm/trunk/runtime/GC/SemiSpace/semispace.c

--- llvm/trunk/runtime/GC/SemiSpace/semispace.c (original)
+++ llvm/trunk/runtime/GC/SemiSpace/semispace.c Sun Jan  6 19:30:53 2008
@@ -97,24 +97,26 @@
  * FIXME: This should be in a code-generator specific library, but for now this
  * will work for all code generators.
-typedef struct GCRoot {
-  void **RootPtr;
-  void *Meta;
-} GCRoot;
+struct FrameMap {
+  int32_t NumRoots; // Number of roots in stack frame.
+  int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
+  void *Meta[];     // May be absent for roots without metadata.
-typedef struct GCRoots {
-  struct GCRoots *Next;
-  unsigned NumRoots;
-  GCRoot RootRecords[];
-} GCRoots;
-GCRoots *llvm_gc_root_chain;
+struct StackEntry {
+  ShadowStackEntry *Next; // Caller's stack entry.
+  const FrameMap *Map;    // Pointer to constant FrameMap.
+  void *Roots[];          // Stack roots (in-place array).
+StackEntry *llvm_gc_root_chain;
 void llvm_cg_walk_gcroots(void (*FP)(void **Root, void *Meta)) {
-  GCRoots *R = llvm_gc_root_chain;
-  for (; R; R = R->Next) {
+  for (StackEntry *R; R; R = R->Next) {
     unsigned i, e;
-    for (i = 0, e = R->NumRoots; i != e; ++i)
-      FP(R->RootRecords[i].RootPtr, R->RootRecords[i].Meta);
+    for (i = 0, e = R->NumMeta; i != e; ++i)
+      FP(&R->Roots[i], R->Map->Meta[i]);
+    for (e = R->NumRoots; i != e; ++i)
+      FP(&R->Roots[i], NULL);
 /* END FIXME! */

Added: llvm/trunk/test/CodeGen/Generic/GC/redundant_init.ll

--- llvm/trunk/test/CodeGen/Generic/GC/redundant_init.ll (added)
+++ llvm/trunk/test/CodeGen/Generic/GC/redundant_init.ll Sun Jan  6 19:30:53 
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -march=x86 | \
+; RUN:   ignore grep {movl..0} | count 0
+%struct.obj = type { i8*, %struct.obj* }
+declare void @g() gc "shadow-stack"
+define void @f(i8* %o) gc "shadow-stack" {
+       %root = alloca i8*
+       call void @llvm.gcroot(i8** %root, i8* null)
+       store i8* %o, i8** %root
+       call void @g()
+       ret void
+declare void @llvm.gcroot(i8**, i8*)

llvm-commits mailing list

Reply via email to