serge-sans-paille created this revision.
serge-sans-paille added reviewers: kees, nickdesaulniers.
serge-sans-paille requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Per the GCC info page:

  If the function is declared 'extern', then this definition of the
  function is used only for inlining.  In no case is the function
  compiled as a standalone function, not even if you take its address
  explicitly.  Such an address becomes an external reference, as if
  you had only declared the function, and had not defined it.

Respect that behavior for inline builtins: keep the original definition, and
generate a copy of the declaration suffixed by '.inline' that's only referenced
in direct call.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D111009

Files:
  clang/lib/AST/Decl.cpp
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/test/CodeGen/memcpy-inline-builtin.c
  clang/test/CodeGen/memcpy-nobuiltin.c
  clang/test/CodeGen/memcpy-nobuiltin.inc
  clang/test/CodeGen/pr9614.c

Index: clang/test/CodeGen/pr9614.c
===================================================================
--- clang/test/CodeGen/pr9614.c
+++ clang/test/CodeGen/pr9614.c
@@ -32,14 +32,14 @@
 
 // CHECK-LABEL: define{{.*}} void @f()
 // CHECK: call void @foo()
-// CHECK: call i32 @abs(i32 %0)
+// CHECK: call i32 @abs(i32 0)
 // CHECK: call i8* @strrchr(
 // CHECK: call void @llvm.prefetch.p0i8(
 // CHECK: call i8* @memchr(
 // CHECK: ret void
 
 // CHECK: declare void @foo()
+// CHECK: declare i32 @abs(i32
 // CHECK: declare i8* @strrchr(i8*, i32)
 // CHECK: declare i8* @memchr(
-// CHECK: declare i32 @abs(i32
 // CHECK: declare void @llvm.prefetch.p0i8(
Index: clang/test/CodeGen/memcpy-nobuiltin.inc
===================================================================
--- clang/test/CodeGen/memcpy-nobuiltin.inc
+++ clang/test/CodeGen/memcpy-nobuiltin.inc
@@ -2,7 +2,7 @@
 extern void *memcpy(void *dest, void const *from, size_t n);
 
 #ifdef WITH_DECL
-inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) {
+inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) {
   char const *ifrom = from;
   char *idest = dest;
   while (n--)
@@ -11,7 +11,7 @@
 }
 #endif
 #ifdef WITH_SELF_REFERENCE_DECL
-inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) {
+inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) {
   if (n != 0)
     memcpy(dest, from, n);
   return dest;
Index: clang/test/CodeGen/memcpy-nobuiltin.c
===================================================================
--- clang/test/CodeGen/memcpy-nobuiltin.c
+++ clang/test/CodeGen/memcpy-nobuiltin.c
@@ -5,7 +5,7 @@
 // CHECK-WITH-DECL-NOT: @llvm.memcpy
 // CHECK-NO-DECL: @llvm.memcpy
 // CHECK-SELF-REF-DECL-LABEL: define {{.*}}i8* @memcpy.inline
-// CHECK-SELF-REF-DECL:       @memcpy(
+// CHECK-SELF-REF-DECL:       @llvm.memcpy.{{.*}}(
 //
 #include <memcpy-nobuiltin.inc>
 void test(void *dest, void const *from, size_t n) {
Index: clang/test/CodeGen/memcpy-inline-builtin.c
===================================================================
--- clang/test/CodeGen/memcpy-inline-builtin.c
+++ clang/test/CodeGen/memcpy-inline-builtin.c
@@ -32,13 +32,39 @@
 // CHECK-NEXT:    store i8* [[TMP0]], i8** [[A_ADDR_I]], align 8
 // CHECK-NEXT:    store i8* [[TMP1]], i8** [[B_ADDR_I]], align 8
 // CHECK-NEXT:    store i64 [[TMP2]], i64* [[C_ADDR_I]], align 8
-// CHECK-NEXT:    call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc !2
+// CHECK-NEXT:    call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4:[0-9]+]], !srcloc !2
 // CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[A_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[B_ADDR_I]], align 8
 // CHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[C_ADDR_I]], align 8
-// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR2]]
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR4]]
+// CHECK-NEXT:    ret i8* [[TMP3]]
+//
+void *foo(void *a, const void *b, size_t c) {
+  return memcpy(a, b, c);
+}
+
+// CHECK-LABEL: @bar(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[CPY:%.*]] = alloca i8* (i8*, i8*, i64)*, align 8
+// CHECK-NEXT:    store i8* [[A:%.*]], i8** [[A_ADDR]], align 8
+// CHECK-NEXT:    store i8* [[B:%.*]], i8** [[B_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[C:%.*]], i64* [[C_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[C_ADDR]], align 8
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 10
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[CMP]] to i64
+// CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i8* (i8*, i8*, i64)* @memcpy, i8* (i8*, i8*, i64)* @foo
+// CHECK-NEXT:    store i8* (i8*, i8*, i64)* [[COND]], i8* (i8*, i8*, i64)** [[CPY]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8* (i8*, i8*, i64)*, i8* (i8*, i8*, i64)** [[CPY]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[C_ADDR]], align 8
+// CHECK-NEXT:    [[CALL:%.*]] = call i8* [[TMP2]](i8* [[TMP3]], i8* [[TMP4]], i64 [[TMP5]])
 // CHECK-NEXT:    ret void
 //
-void foo(void *a, const void *b, size_t c) {
-  memcpy(a, b, c);
+void bar(void *a, const void *b, size_t c) {
+  void *(*cpy)(void *, const void *, size_t) = c > 10 ? memcpy : foo;
+  cpy(a, b, c);
 }
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/CRC.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
+
 using namespace clang;
 using namespace CodeGen;
 
@@ -1294,10 +1295,20 @@
   FunctionArgList Args;
   QualType ResTy = BuildFunctionArgList(GD, Args);
 
-  // Give a different name to inline builtin to avoid conflict with actual
-  // builtins.
-  if (FD->isInlineBuiltinDeclaration() && Fn)
-    Fn->setName(Fn->getName() + ".inline");
+  // When generating code for a builtin with an inline declaration, use a
+  // mangled name to hold the actual body, while keeping an external definition
+  // in case the function pointer is referenced somewhere.
+  if (FD->isInlineBuiltinDeclaration() && Fn) {
+    llvm::Module &M = CGM.getModule();
+    llvm::Function *Clone = M.getFunction((Fn->getName() + ".inline").str());
+    if (!Clone) {
+      Clone = llvm::Function::Create(Fn->getFunctionType(), Fn->getLinkage(),
+                                     Fn->getAddressSpace(),
+                                     (Fn->getName() + ".inline").str(), &M);
+      Clone->addFnAttr(llvm::Attribute::AlwaysInline);
+    }
+    Fn = Clone;
+  }
 
   // Check if we should generate debug info for this function.
   if (FD->hasAttr<NoDebugAttr>()) {
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -4891,13 +4891,30 @@
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
 
   if (auto builtinID = FD->getBuiltinID()) {
-    // Replaceable builtin provide their own implementation of a builtin. Unless
-    // we are in the builtin implementation itself, don't call the actual
-    // builtin. If we are in the builtin implementation, avoid trivial infinite
+
+    // Replaceable builtin provide their own implementation of a builtin. If we
+    // are in an inline builtin implementation, avoid trivial infinite
     // recursion.
+    StringRef FDInlineName = (FD->getName() + ".inline").str();
     if (!FD->isInlineBuiltinDeclaration() ||
-        CGF.CurFn->getName() == FD->getName())
+        CGF.CurFn->getName() == FDInlineName) {
       return CGCallee::forBuiltin(builtinID, FD);
+    }
+    // When directing calling an inline builtin, call it through it's mangled
+    // name to make it clear it's not the actual builtin.
+    else {
+      llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
+      llvm::Function *Fn = llvm::cast<llvm::Function>(CalleePtr);
+      llvm::Module &M = CGF.CGM.getModule();
+      llvm::Function *Clone = M.getFunction(FDInlineName);
+      if (!Clone) {
+        Clone = llvm::Function::Create(Fn->getFunctionType(), Fn->getLinkage(),
+                                       Fn->getAddressSpace(),
+                                       FD->getName() + ".inline", &M);
+        Clone->addFnAttr(llvm::Attribute::AlwaysInline);
+      }
+      return CGCallee::forDirect(Clone, GD);
+    }
   }
 
   llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
@@ -4905,6 +4922,7 @@
       FD->hasAttr<CUDAGlobalAttr>())
     CalleePtr = CGF.CGM.getCUDARuntime().getKernelStub(
         cast<llvm::GlobalValue>(CalleePtr->stripPointerCasts()));
+
   return CGCallee::forDirect(CalleePtr, GD);
 }
 
Index: clang/lib/AST/Decl.cpp
===================================================================
--- clang/lib/AST/Decl.cpp
+++ clang/lib/AST/Decl.cpp
@@ -3177,7 +3177,8 @@
 
   const FunctionDecl *Definition;
   return hasBody(Definition) && Definition->isInlineSpecified() &&
-         Definition->hasAttr<AlwaysInlineAttr>();
+         Definition->hasAttr<AlwaysInlineAttr>() &&
+         Definition->hasAttr<GNUInlineAttr>();
 }
 
 bool FunctionDecl::isDestroyingOperatorDelete() const {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to