ahatanak created this revision.

We found that the IR IRGen emits when expanding __builtin_os_log_format is 
quite big and has a lot of redundancy.

For example, when clang compiles the following code:

  void foo1(void *buf) {
    __builtin_os_log_format(buf, "%d %d", 11, 22);
  }

The IR looks like this:

  define void @foo1(i8* %buf) #0 {
  entry:
    %buf.addr = alloca i8*, align 8
    store i8* %buf, i8** %buf.addr, align 8
    %0 = load i8*, i8** %buf.addr, align 8
    %summary = getelementptr i8, i8* %0, i64 0
    store i8 0, i8* %summary, align 1
    %numArgs = getelementptr i8, i8* %0, i64 1
    store i8 2, i8* %numArgs, align 1
    %argDescriptor = getelementptr i8, i8* %0, i64 2
    store i8 0, i8* %argDescriptor, align 1
    %argSize = getelementptr i8, i8* %0, i64 3
    store i8 4, i8* %argSize, align 1
    %1 = getelementptr i8, i8* %0, i64 4
    %2 = bitcast i8* %1 to i32*
    store i32 11, i32* %2, align 1
    %argDescriptor1 = getelementptr i8, i8* %0, i64 8
    store i8 0, i8* %argDescriptor1, align 1
    %argSize2 = getelementptr i8, i8* %0, i64 9
    store i8 4, i8* %argSize2, align 1
    %3 = getelementptr i8, i8* %0, i64 10
    %4 = bitcast i8* %3 to i32*
    store i32 22, i32* %4, align 1
    ret void
  }

The IR generated when compiling a similar call like 
"__builtin_os_log_format(buf, "%d %d", 33, 44)" is almost the same except for 
the values of the integer constants stored, so there is an opportunity for code 
reductionton here.

To reduce code size, this patch modifies IRGen to emit a helper function that 
can be used by different call sites that call __builtin_os_log_format in a 
program. When compiling with -Oz, the generated helper function is marked as 
linkonce_odr, hidden, and noinline so that the linker can merge identical 
helper functions from different translation units. When compiling with other 
optimization levels, the function is marked as 'internal' and the generated IR 
should look mostly the same after inlining.

This patch also fixes a bug where the generated IR writes past the buffer when 
%m is the last directive. For example, the size of 'buf' in the following code 
is 4 but IRGen emits a store that writes a 4-byte value at buf+4.

  char buf[__builtin_os_log_format_buffer_size("%m")];
  __builtin_os_log_format(buf, "%m");

Original patch was written by Duncan.

rdar://problem/34065973
dar://problem/34196543


https://reviews.llvm.org/D38606

Files:
  lib/CodeGen/CGBuiltin.cpp
  lib/CodeGen/CodeGenFunction.h
  test/CodeGen/builtins.c
  test/CodeGenObjC/os_log.m

Index: test/CodeGenObjC/os_log.m
===================================================================
--- test/CodeGenObjC/os_log.m
+++ test/CodeGenObjC/os_log.m
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -fobjc-arc -O2 | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -fobjc-arc -O0 | FileCheck %s -check-prefix=CHECK-O0
 
 // Make sure we emit clang.arc.use before calling objc_release as part of the
 // cleanup. This way we make sure the object will not be released until the
@@ -12,28 +13,67 @@
 // Behavior of __builtin_os_log differs between platforms, so only test on X86
 #ifdef __x86_64__
 // CHECK-LABEL: define i8* @test_builtin_os_log
+// CHECK-O0-LABEL: define i8* @test_builtin_os_log
+// CHECK: (i8* returned %[[BUF:.*]])
+// CHECK-O0: (i8* %[[BUF:.*]])
 void *test_builtin_os_log(void *buf) {
   return __builtin_os_log_format(buf, "capabilities: %@", GenString());
 
-  // CHECK: store i8 2, i8*
-  // CHECK: [[NUM_ARGS:%.*]] = getelementptr i8, i8* {{.*}}, i64 1
-  // CHECK: store i8 1, i8* [[NUM_ARGS]]
-  //
-  // CHECK: [[ARG1_DESC:%.*]] = getelementptr i8, i8* {{.*}}, i64 2
-  // CHECK: store i8 64, i8* [[ARG1_DESC]]
-  // CHECK: [[ARG1_SIZE:%.*]] = getelementptr i8, i8* {{.*}}, i64 3
-  // CHECK: store i8 8, i8* [[ARG1_SIZE]]
-  // CHECK: [[ARG1:%.*]] = getelementptr i8, i8* {{.*}}, i64 4
-  // CHECK: [[ARG1_CAST:%.*]] = bitcast i8* [[ARG1]] to
-
-  // CHECK: [[STRING:%.*]] = {{.*}} call {{.*}} @GenString()
-  // CHECK: [[STRING_CAST:%.*]] = bitcast {{.*}} [[STRING]] to
-  // CHECK: call {{.*}} @objc_retainAutoreleasedReturnValue(i8* [[STRING_CAST]])
-  // CHECK: store {{.*}} [[STRING]], {{.*}} [[ARG1_CAST]]
-
-  // CHECK: call void (...) @clang.arc.use({{.*}} [[STRING]])
-  // CHECK: call void @objc_release(i8* [[STRING_CAST]])
-  // CHECK: ret i8*
+  // CHECK: %[[CALL:.*]] = tail call %[[V0:.*]]* (...) @GenString()
+  // CHECK: %[[V0]] = bitcast %[[V0]]* %[[CALL]] to i8*
+  // CHECK: %[[V1:.*]] = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %[[V0]])
+  // CHECK: %[[V2:.*]] = ptrtoint %[[V0]]* %[[CALL]] to i64
+  // CHECK: store i8 2, i8* %[[BUF]], align 1
+  // CHECK: %[[NUMARGS_I:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
+  // CHECK: store i8 1, i8* %[[NUMARGS_I]], align 1
+  // CHECK: %[[ARGDESCRIPTOR_I:.*]] = getelementptr i8, i8* %[[BUF]], i64 2
+  // CHECK: store i8 64, i8* %[[ARGDESCRIPTOR_I]], align 1
+  // CHECK: %[[ARGSIZE_I:.*]] = getelementptr i8, i8* %[[BUF]], i64 3
+  // CHECK: store i8 8, i8* %[[ARGSIZE_I]], align 1
+  // CHECK: %[[ARGDATA_I:.*]] = getelementptr i8, i8* %[[BUF]], i64 4
+  // CHECK: %[[ARGDATACAST_I:.*]] = bitcast i8* %[[ARGDATA_I]] to i64*
+  // CHECK: store i64 %[[V2]], i64* %[[ARGDATACAST_I]], align 1
+  // CHECK: tail call void (...) @clang.arc.use(%[[V0]]* %[[CALL]])
+  // CHECK: tail call void @objc_release(i8* %[[V0]])
+  // CHECK: ret i8* %[[BUF]]
+
+  // clang.arc.use is used and removed in IR optimizations. At O0, we should not
+  // emit clang.arc.use, since it will not be removed and we will have a link
+  // error.
+  // CHECK-O0: %[[BUF_ADDR:.*]] = alloca i8*, align 8
+  // CHECK-O0: store i8* %[[BUF]], i8** %[[BUF_ADDR]], align 8
+  // CHECK-O0: %[[V0:.*]] = load i8*, i8** %[[BUF_ADDR]], align 8
+  // CHECK-O0: %[[CALL:.*]] = call %[[V0]]* (...) @GenString()
+  // CHECK-O0: %[[V1:.*]] = bitcast %[[V0]]* %[[CALL]] to i8*
+  // CHECK-O0: %[[V2:.*]] = call i8* @objc_retainAutoreleasedReturnValue(i8* %[[V1]])
+  // CHECK-O0: %[[V3:.*]] = bitcast i8* %[[V2]] to %[[V0]]*
+  // CHECK-O0: %[[V4:.*]] = ptrtoint %[[V0]]* %[[V3]] to i64
+  // CHECK-O0: call void @__os_log_helper_1_2_1_8_64(i8* %[[V0]], i64 %[[V4]])
+  // CHECK-O0: %[[V5:.*]] = bitcast %[[V0]]* %[[V3]] to i8*
+  // CHECK-O0-NOT call void (...) @clang.arc.use({{.*}}
+  // CHECK-O0: call void @objc_release(i8* %[[V5]])
+  // CHECK-O0: ret i8* %[[V0]]
 }
 
+// CHECK-O0-LABEL: define internal void @__os_log_helper_1_2_1_8_64
+// CHECK-O0: (i8* %[[BUFFER:.*]], i64 %[[ARG0:.*]])
+
+// CHECK-O0: %[[BUFFER_ADDR:.*]] = alloca i8*, align 8
+// CHECK-O0: %[[ARG0_ADDR:.*]] = alloca i64, align 8
+// CHECK-O0: store i8* %[[BUFFER]], i8** %[[BUFFER_ADDR]], align 8
+// CHECK-O0: store i64 %[[ARG0]], i64* %[[ARG0_ADDR]], align 8
+// CHECK-O0: %[[BUF:.*]] = load i8*, i8** %[[BUFFER_ADDR]], align 8
+// CHECK-O0: %[[SUMMARY:.*]] = getelementptr i8, i8* %[[BUF]], i64 0
+// CHECK-O0: store i8 2, i8* %[[SUMMARY]], align 1
+// CHECK-O0: %[[NUMARGS:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
+// CHECK-O0: store i8 1, i8* %[[NUMARGS]], align 1
+// CHECK-O0: %[[ARGDESCRIPTOR:.*]] = getelementptr i8, i8* %[[BUF]], i64 2
+// CHECK-O0: store i8 64, i8* %[[ARGDESCRIPTOR]], align 1
+// CHECK-O0: %[[ARGSIZE:.*]] = getelementptr i8, i8* %[[BUF]], i64 3
+// CHECK-O0: store i8 8, i8* %[[ARGSIZE]], align 1
+// CHECK-O0: %[[ARGDATA:.*]] = getelementptr i8, i8* %[[BUF]], i64 4
+// CHECK-O0: %[[ARGDATACAST:.*]] = bitcast i8* %[[ARGDATA]] to i64*
+// CHECK-O0: %[[V0:.*]] = load i64, i64* %[[ARG0_ADDR]], align 8
+// CHECK-O0: store i64 %[[V0]], i64* %[[ARGDATACAST]], align 1
+
 #endif
Index: test/CodeGen/builtins.c
===================================================================
--- test/CodeGen/builtins.c
+++ test/CodeGen/builtins.c
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -emit-llvm -o %t %s
 // RUN: not grep __builtin %t
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -Oz -disable-llvm-passes | FileCheck -check-prefixes=CHECK,CHECKOZ %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-darwin-apple -Os -disable-llvm-passes | FileCheck -check-prefixes=CHECK,CHECKOS %s
 
 int printf(const char *, ...);
 
@@ -378,229 +379,393 @@
 #ifdef __x86_64__
 
 // CHECK-LABEL: define void @test_builtin_os_log
-// CHECK: (i8* [[BUF:%.*]], i32 [[I:%.*]], i8* [[DATA:%.*]])
+// CHECK: (i8* %[[BUF:.*]], i32 %[[I:.*]], i8* %[[DATA:.*]])
 void test_builtin_os_log(void *buf, int i, const char *data) {
   volatile int len;
-  // CHECK: store i8* [[BUF]], i8** [[BUF_ADDR:%.*]], align 8
-  // CHECK: store i32 [[I]], i32* [[I_ADDR:%.*]], align 4
-  // CHECK: store i8* [[DATA]], i8** [[DATA_ADDR:%.*]], align 8
-
-  // CHECK: store volatile i32 34
+  // CHECK: %[[BUF_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[I_ADDR:.*]] = alloca i32, align 4
+  // CHECK: %[[DATA_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[LEN:.*]] = alloca i32, align 4
+  // CHECK: store i8* %[[BUF]], i8** %[[BUF_ADDR]], align 8
+  // CHECK: store i32 %[[I]], i32* %[[I_ADDR]], align 4
+  // CHECK: store i8* %[[DATA]], i8** %[[DATA_ADDR]], align 8
+
+  // CHECK: store volatile i32 34, i32* %[[LEN]]
   len = __builtin_os_log_format_buffer_size("%d %{public}s %{private}.16P", i, data, data);
 
-  // CHECK: [[BUF2:%.*]] = load i8*, i8** [[BUF_ADDR]]
-  // CHECK: [[SUMMARY:%.*]] = getelementptr i8, i8* [[BUF2]], i64 0
-  // CHECK: store i8 3, i8* [[SUMMARY]]
-  // CHECK: [[NUM_ARGS:%.*]] = getelementptr i8, i8* [[BUF2]], i64 1
-  // CHECK: store i8 4, i8* [[NUM_ARGS]]
-  //
-  // CHECK: [[ARG1_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 2
-  // CHECK: store i8 0, i8* [[ARG1_DESC]]
-  // CHECK: [[ARG1_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 3
-  // CHECK: store i8 4, i8* [[ARG1_SIZE]]
-  // CHECK: [[ARG1:%.*]] = getelementptr i8, i8* [[BUF2]], i64 4
-  // CHECK: [[ARG1_INT:%.*]] = bitcast i8* [[ARG1]] to i32*
-  // CHECK: [[I2:%.*]] = load i32, i32* [[I_ADDR]]
-  // CHECK: store i32 [[I2]], i32* [[ARG1_INT]]
-
-  // CHECK: [[ARG2_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 8
-  // CHECK: store i8 34, i8* [[ARG2_DESC]]
-  // CHECK: [[ARG2_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 9
-  // CHECK: store i8 8, i8* [[ARG2_SIZE]]
-  // CHECK: [[ARG2:%.*]] = getelementptr i8, i8* [[BUF2]], i64 10
-  // CHECK: [[ARG2_PTR:%.*]] = bitcast i8* [[ARG2]] to i8**
-  // CHECK: [[DATA2:%.*]] = load i8*, i8** [[DATA_ADDR]]
-  // CHECK: store i8* [[DATA2]], i8** [[ARG2_PTR]]
-
-  // CHECK: [[ARG3_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 18
-  // CHECK: store i8 17, i8* [[ARG3_DESC]]
-  // CHECK: [[ARG3_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 19
-  // CHECK: store i8 4, i8* [[ARG3_SIZE]]
-  // CHECK: [[ARG3:%.*]] = getelementptr i8, i8* [[BUF2]], i64 20
-  // CHECK: [[ARG3_INT:%.*]] = bitcast i8* [[ARG3]] to i32*
-  // CHECK: store i32 16, i32* [[ARG3_INT]]
-
-  // CHECK: [[ARG4_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 24
-  // CHECK: store i8 49, i8* [[ARG4_DESC]]
-  // CHECK: [[ARG4_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 25
-  // CHECK: store i8 8, i8* [[ARG4_SIZE]]
-  // CHECK: [[ARG4:%.*]] = getelementptr i8, i8* [[BUF2]], i64 26
-  // CHECK: [[ARG4_PTR:%.*]] = bitcast i8* [[ARG4]] to i8**
-  // CHECK: [[DATA3:%.*]] = load i8*, i8** [[DATA_ADDR]]
-  // CHECK: store i8* [[DATA3]], i8** [[ARG4_PTR]]
-
+  // CHECK: %[[V1:.*]] = load i8*, i8** %[[BUF_ADDR]]
+  // CHECK: %[[V2:.*]] = load i32, i32* %[[I_ADDR]]
+  // CHECK: %[[V3:.*]] = load i8*, i8** %[[DATA_ADDR]]
+  // CHECK: %[[V4:.*]] = ptrtoint i8* %[[V3]] to i64
+  // CHECK: %[[V5:.*]] = load i8*, i8** %[[DATA_ADDR]]
+  // CHECK: %[[V6:.*]] = ptrtoint i8* %[[V5]] to i64
+  // CHECK: call void @__os_log_helper_1_3_4_4_0_8_34_4_17_8_49(i8* %[[V1]], i32 %[[V2]], i64 %[[V4]], i32 16, i64 %[[V6]])
   __builtin_os_log_format(buf, "%d %{public}s %{private}.16P", i, data, data);
 }
 
-// CHECK-LABEL: define void @test_builtin_os_log_errno
-// CHECK: (i8* [[BUF:%.*]], i8* [[DATA:%.*]])
-void test_builtin_os_log_errno(void *buf, const char *data) {
-  volatile int len;
-  // CHECK: store i8* [[BUF]], i8** [[BUF_ADDR:%.*]], align 8
-  // CHECK: store i8* [[DATA]], i8** [[DATA_ADDR:%.*]], align 8
-
-  // CHECK: store volatile i32 2
-  len = __builtin_os_log_format_buffer_size("%S");
-
-  // CHECK: [[BUF2:%.*]] = load i8*, i8** [[BUF_ADDR]]
-  // CHECK: [[SUMMARY:%.*]] = getelementptr i8, i8* [[BUF2]], i64 0
-  // CHECK: store i8 2, i8* [[SUMMARY]]
-  // CHECK: [[NUM_ARGS:%.*]] = getelementptr i8, i8* [[BUF2]], i64 1
-  // CHECK: store i8 1, i8* [[NUM_ARGS]]
-
-  // CHECK: [[ARG1_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 2
-  // CHECK: store i8 96, i8* [[ARG1_DESC]]
-  // CHECK: [[ARG1_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 3
-  // CHECK: store i8 0, i8* [[ARG1_SIZE]]
-  // CHECK: [[ARG1:%.*]] = getelementptr i8, i8* [[BUF2]], i64 4
-  // CHECK: [[ARG1_INT:%.*]] = bitcast i8* [[ARG1]] to i32*
-  // CHECK: store i32 0, i32* [[ARG1_INT]]
-
-  __builtin_os_log_format(buf, "%m");
-}
+// CHECKOZ-LABEL: define linkonce_odr hidden void @__os_log_helper_1_3_4_4_0_8_34_4_17_8_49
+// CHECKOS-LABEL: define internal void @__os_log_helper_1_3_4_4_0_8_34_4_17_8_49
+// CHECK: (i8* %[[BUFFER:.*]], i32 %[[ARG0:.*]], i64 %[[ARG1:.*]], i32 %[[ARG2:.*]], i64 %[[ARG3:.*]])
+
+// CHECK: %[[BUFFER_ADDR:.*]] = alloca i8*, align 8
+// CHECK: %[[ARG0_ADDR:.*]] = alloca i32, align 4
+// CHECK: %[[ARG1_ADDR:.*]] = alloca i64, align 8
+// CHECK: %[[ARG2_ADDR:.*]] = alloca i32, align 4
+// CHECK: %[[ARG3_ADDR:.*]] = alloca i64, align 8
+// CHECK: store i8* %[[BUFFER]], i8** %[[BUFFER_ADDR]], align 8
+// CHECK: store i32 %[[ARG0]], i32* %[[ARG0_ADDR]], align 4
+// CHECK: store i64 %[[ARG1]], i64* %[[ARG1_ADDR]], align 8
+// CHECK: store i32 %[[ARG2]], i32* %[[ARG2_ADDR]], align 4
+// CHECK: store i64 %[[ARG3]], i64* %[[ARG3_ADDR]], align 8
+// CHECK: %[[BUF:.*]] = load i8*, i8** %[[BUFFER_ADDR]], align 8
+// CHECK: %[[SUMMARY:.*]] = getelementptr i8, i8* %[[BUF]], i64 0
+// CHECK: store i8 3, i8* %[[SUMMARY]], align 1
+// CHECK: %[[NUMARGS:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
+// CHECK: store i8 4, i8* %[[NUMARGS]], align 1
+// CHECK: %[[ARGDESCRIPTOR:.*]] = getelementptr i8, i8* %[[BUF]], i64 2
+// CHECK: store i8 0, i8* %[[ARGDESCRIPTOR]], align 1
+// CHECK: %[[ARGSIZE:.*]] = getelementptr i8, i8* %[[BUF]], i64 3
+// CHECK: store i8 4, i8* %[[ARGSIZE]], align 1
+// CHECK: %[[ARGDATA:.*]] = getelementptr i8, i8* %[[BUF]], i64 4
+// CHECK: %[[ARGDATACAST:.*]] = bitcast i8* %[[ARGDATA]] to i32*
+// CHECK: %[[V0:.*]] = load i32, i32* %[[ARG0_ADDR]], align 4
+// CHECK: store i32 %[[V0]], i32* %[[ARGDATACAST]], align 1
+// CHECK: %[[ARGDESCRIPTOR1:.*]] = getelementptr i8, i8* %[[BUF]], i64 8
+// CHECK: store i8 34, i8* %[[ARGDESCRIPTOR1]], align 1
+// CHECK: %[[ARGSIZE2:.*]] = getelementptr i8, i8* %[[BUF]], i64 9
+// CHECK: store i8 8, i8* %[[ARGSIZE2]], align 1
+// CHECK: %[[ARGDATA3:.*]] = getelementptr i8, i8* %[[BUF]], i64 10
+// CHECK: %[[ARGDATACAST4:.*]] = bitcast i8* %[[ARGDATA3]] to i64*
+// CHECK: %[[V1:.*]] = load i64, i64* %[[ARG1_ADDR]], align 8
+// CHECK: store i64 %[[V1]], i64* %[[ARGDATACAST4]], align 1
+// CHECK: %[[ARGDESCRIPTOR5:.*]] = getelementptr i8, i8* %[[BUF]], i64 18
+// CHECK: store i8 17, i8* %[[ARGDESCRIPTOR5]], align 1
+// CHECK: %[[ARGSIZE6:.*]] = getelementptr i8, i8* %[[BUF]], i64 19
+// CHECK: store i8 4, i8* %[[ARGSIZE6]], align 1
+// CHECK: %[[ARGDATA7:.*]] = getelementptr i8, i8* %[[BUF]], i64 20
+// CHECK: %[[ARGDATACAST8:.*]] = bitcast i8* %[[ARGDATA7]] to i32*
+// CHECK: %[[V2:.*]] = load i32, i32* %[[ARG2_ADDR]], align 4
+// CHECK: store i32 %[[V2]], i32* %[[ARGDATACAST8]], align 1
+// CHECK: %[[ARGDESCRIPTOR9:.*]] = getelementptr i8, i8* %[[BUF]], i64 24
+// CHECK: store i8 49, i8* %[[ARGDESCRIPTOR9]], align 1
+// CHECK: %[[ARGSIZE10:.*]] = getelementptr i8, i8* %[[BUF]], i64 25
+// CHECK: store i8 8, i8* %[[ARGSIZE10]], align 1
+// CHECK: %[[ARGDATA11:.*]] = getelementptr i8, i8* %[[BUF]], i64 26
+// CHECK: %[[ARGDATACAST12:.*]] = bitcast i8* %[[ARGDATA11]] to i64*
+// CHECK: %[[V3:.*]] = load i64, i64* %[[ARG3_ADDR]], align 8
+// CHECK: store i64 %[[V3]], i64* %[[ARGDATACAST12]], align 1
 
 // CHECK-LABEL: define void @test_builtin_os_log_wide
-// CHECK: (i8* [[BUF:%.*]], i8* [[DATA:%.*]], i32* [[STR:%.*]])
+// CHECK: (i8* %[[BUF:.*]], i8* %[[DATA:.*]], i32* %[[STR:.*]])
 typedef int wchar_t;
 void test_builtin_os_log_wide(void *buf, const char *data, wchar_t *str) {
   volatile int len;
-  // CHECK: store i8* [[BUF]], i8** [[BUF_ADDR:%.*]], align 8
-  // CHECK: store i8* [[DATA]], i8** [[DATA_ADDR:%.*]], align 8
-  // CHECK: store i32* [[STR]], i32** [[STR_ADDR:%.*]],
 
-  // CHECK: store volatile i32 12
+  // CHECK: %[[BUF_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[DATA_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[STR_ADDR:.*]] = alloca i32*, align 8
+  // CHECK: %[[LEN:.*]] = alloca i32, align 4
+  // CHECK: store i8* %[[BUF]], i8** %[[BUF_ADDR]], align 8
+  // CHECK: store i8* %[[DATA]], i8** %[[DATA_ADDR]], align 8
+  // CHECK: store i32* %[[STR]], i32** %[[STR_ADDR]], align 8
+
+  // CHECK: store volatile i32 12, i32* %[[LEN]], align 4
   len = __builtin_os_log_format_buffer_size("%S", str);
 
-  // CHECK: [[BUF2:%.*]] = load i8*, i8** [[BUF_ADDR]]
-  // CHECK: [[SUMMARY:%.*]] = getelementptr i8, i8* [[BUF2]], i64 0
-  // CHECK: store i8 2, i8* [[SUMMARY]]
-  // CHECK: [[NUM_ARGS:%.*]] = getelementptr i8, i8* [[BUF2]], i64 1
-  // CHECK: store i8 1, i8* [[NUM_ARGS]]
-
-  // CHECK: [[ARG1_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 2
-  // CHECK: store i8 80, i8* [[ARG1_DESC]]
-  // CHECK: [[ARG1_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 3
-  // CHECK: store i8 8, i8* [[ARG1_SIZE]]
-  // CHECK: [[ARG1:%.*]] = getelementptr i8, i8* [[BUF2]], i64 4
-  // CHECK: [[ARG1_PTR:%.*]] = bitcast i8* [[ARG1]] to i32**
-  // CHECK: [[STR2:%.*]] = load i32*, i32** [[STR_ADDR]]
-  // CHECK: store i32* [[STR2]], i32** [[ARG1_PTR]]
+  // CHECK: %[[V1:.*]] = load i8*, i8** %[[BUF_ADDR]], align 8
+  // CHECK: %[[V2:.*]] = load i32*, i32** %[[STR_ADDR]], align 8
+  // CHECK: %[[V3:.*]] = ptrtoint i32* %[[V2]] to i64
+  // CHECK: call void @__os_log_helper_1_2_1_8_80(i8* %[[V1]], i64 %[[V3]])
 
   __builtin_os_log_format(buf, "%S", str);
 }
 
+// CHECKOZ-LABEL: define linkonce_odr hidden void @__os_log_helper_1_2_1_8_80
+// CHECKOS-LABEL: define internal void @__os_log_helper_1_2_1_8_80
+// CHECK: (i8* %[[BUFFER:.*]], i64 %[[ARG0:.*]])
+
+// CHECK: %[[BUFFER_ADDR:.*]] = alloca i8*, align 8
+// CHECK: %[[ARG0_ADDR:.*]] = alloca i64, align 8
+// CHECK: store i8* %[[BUFFER]], i8** %[[BUFFER_ADDR]], align 8
+// CHECK: store i64 %[[ARG0]], i64* %[[ARG0_ADDR]], align 8
+// CHECK: %[[BUF:.*]] = load i8*, i8** %[[BUFFER_ADDR]], align 8
+// CHECK: %[[SUMMARY:.*]] = getelementptr i8, i8* %[[BUF]], i64 0
+// CHECK: store i8 2, i8* %[[SUMMARY]], align 1
+// CHECK: %[[NUMARGS:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
+// CHECK: store i8 1, i8* %[[NUMARGS]], align 1
+// CHECK: %[[ARGDESCRIPTOR:.*]] = getelementptr i8, i8* %[[BUF]], i64 2
+// CHECK: store i8 80, i8* %[[ARGDESCRIPTOR]], align 1
+// CHECK: %[[ARGSIZE:.*]] = getelementptr i8, i8* %[[BUF]], i64 3
+// CHECK: store i8 8, i8* %[[ARGSIZE]], align 1
+// CHECK: %[[ARGDATA:.*]] = getelementptr i8, i8* %[[BUF]], i64 4
+// CHECK: %[[ARGDATACAST:.*]] = bitcast i8* %[[ARGDATA]] to i64*
+// CHECK: %[[V0:.*]] = load i64, i64* %[[ARG0_ADDR]], align 8
+// CHECK: store i64 %[[V0]], i64* %[[ARGDATACAST]], align 1
+
 // CHECK-LABEL: define void @test_builtin_os_log_precision_width
-// CHECK: (i8* [[BUF:%.*]], i8* [[DATA:%.*]], i32 [[PRECISION:%.*]], i32 [[WIDTH:%.*]])
+// CHECK: (i8* %[[BUF:.*]], i8* %[[DATA:.*]], i32 %[[PRECISION:.*]], i32 %[[WIDTH:.*]])
 void test_builtin_os_log_precision_width(void *buf, const char *data,
                                          int precision, int width) {
   volatile int len;
-  // CHECK: store i8* [[BUF]], i8** [[BUF_ADDR:%.*]], align 8
-  // CHECK: store i8* [[DATA]], i8** [[DATA_ADDR:%.*]], align 8
-  // CHECK: store i32 [[PRECISION]], i32* [[PRECISION_ADDR:%.*]], align 4
-  // CHECK: store i32 [[WIDTH]], i32* [[WIDTH_ADDR:%.*]], align 4
-
-  // CHECK: store volatile i32 24,
+  // CHECK: %[[BUF_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[DATA_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[PRECISION_ADDR:.*]] = alloca i32, align 4
+  // CHECK: %[[WIDTH_ADDR:.*]] = alloca i32, align 4
+  // CHECK: %[[LEN:.*]] = alloca i32, align 4
+  // CHECK: store i8* %[[BUF]], i8** %[[BUF_ADDR]], align 8
+  // CHECK: store i8* %[[DATA]], i8** %[[DATA_ADDR]], align 8
+  // CHECK: store i32 %[[PRECISION]], i32* %[[PRECISION_ADDR]], align 4
+  // CHECK: store i32 %[[WIDTH]], i32* %[[WIDTH_ADDR]], align 4
+
+  // CHECK: store volatile i32 24, i32* %[[LEN]], align 4
   len = __builtin_os_log_format_buffer_size("Hello %*.*s World", precision, width, data);
 
-  // CHECK: [[BUF2:%.*]] = load i8*, i8** [[BUF_ADDR]]
-  // CHECK: [[SUMMARY:%.*]] = getelementptr i8, i8* [[BUF2]], i64 0
-  // CHECK: store i8 2, i8* [[SUMMARY]]
-  // CHECK: [[NUM_ARGS:%.*]] = getelementptr i8, i8* [[BUF2]], i64 1
-  // CHECK: store i8 3, i8* [[NUM_ARGS]]
-
-  // CHECK: [[ARG1_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 2
-  // CHECK: store i8 0, i8* [[ARG1_DESC]]
-  // CHECK: [[ARG1_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 3
-  // CHECK: store i8 4, i8* [[ARG1_SIZE]]
-  // CHECK: [[ARG1:%.*]] = getelementptr i8, i8* [[BUF2]], i64 4
-  // CHECK: [[ARG1_INT:%.*]] = bitcast i8* [[ARG1]] to i32*
-  // CHECK: [[ARG1_VAL:%.*]] = load i32, i32* [[PRECISION_ADDR]]
-  // CHECK: store i32 [[ARG1_VAL]], i32* [[ARG1_INT]]
-
-  // CHECK: [[ARG2_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 8
-  // CHECK: store i8 16, i8* [[ARG2_DESC]]
-  // CHECK: [[ARG2_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 9
-  // CHECK: store i8 4, i8* [[ARG2_SIZE]]
-  // CHECK: [[ARG2:%.*]] = getelementptr i8, i8* [[BUF2]], i64 10
-  // CHECK: [[ARG2_INT:%.*]] = bitcast i8* [[ARG2]] to i32*
-  // CHECK: [[ARG2_VAL:%.*]] = load i32, i32* [[WIDTH_ADDR]]
-  // CHECK: store i32 [[ARG2_VAL]], i32* [[ARG2_INT]]
-
-  // CHECK: [[ARG3_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 14
-  // CHECK: store i8 32, i8* [[ARG3_DESC]]
-  // CHECK: [[ARG3_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 15
-  // CHECK: store i8 8, i8* [[ARG3_SIZE]]
-  // CHECK: [[ARG3:%.*]] = getelementptr i8, i8* [[BUF2]], i64 16
-  // CHECK: [[ARG3_PTR:%.*]] = bitcast i8* [[ARG3]] to i8**
-  // CHECK: [[DATA2:%.*]] = load i8*, i8** [[DATA_ADDR]]
-  // CHECK: store i8* [[DATA2]], i8** [[ARG3_PTR]]
-
+  // CHECK: %[[V1:.*]] = load i8*, i8** %[[BUF_ADDR]], align 8
+  // CHECK: %[[V2:.*]] = load i32, i32* %[[PRECISION_ADDR]], align 4
+  // CHECK: %[[V3:.*]] = load i32, i32* %[[WIDTH_ADDR]], align 4
+  // CHECK: %[[V4:.*]] = load i8*, i8** %[[DATA_ADDR]], align 8
+  // CHECK: %[[V5:.*]] = ptrtoint i8* %[[V4]] to i64
+  // CHECK: call void @__os_log_helper_1_2_3_4_0_4_16_8_32(i8* %[[V1]], i32 %[[V2]], i32 %[[V3]], i64 %[[V5]])
   __builtin_os_log_format(buf, "Hello %*.*s World", precision, width, data);
 }
 
+// CHECKOZ-LABEL: define linkonce_odr hidden void @__os_log_helper_1_2_3_4_0_4_16_8_32
+// CHECKOS-LABEL: define internal void @__os_log_helper_1_2_3_4_0_4_16_8_32
+// CHECK: (i8* %[[BUFFER:.*]], i32 %[[ARG0:.*]], i32 %[[ARG1:.*]], i64 %[[ARG2:.*]])
+
+// CHECK: %[[BUFFER_ADDR:.*]] = alloca i8*, align 8
+// CHECK: %[[ARG0_ADDR:.*]] = alloca i32, align 4
+// CHECK: %[[ARG1_ADDR:.*]] = alloca i32, align 4
+// CHECK: %[[ARG2_ADDR:.*]] = alloca i64, align 8
+// CHECK: store i8* %[[BUFFER]], i8** %[[BUFFER_ADDR]], align 8
+// CHECK: store i32 %[[ARG0]], i32* %[[ARG0_ADDR]], align 4
+// CHECK: store i32 %[[ARG1]], i32* %[[ARG1_ADDR]], align 4
+// CHECK: store i64 %[[ARG2]], i64* %[[ARG2_ADDR]], align 8
+// CHECK: %[[BUF:.*]] = load i8*, i8** %[[BUFFER_ADDR]], align 8
+// CHECK: %[[SUMMARY:.*]] = getelementptr i8, i8* %[[BUF]], i64 0
+// CHECK: store i8 2, i8* %[[SUMMARY]], align 1
+// CHECK: %[[NUMARGS:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
+// CHECK: store i8 3, i8* %[[NUMARGS]], align 1
+// CHECK: %[[ARGDESCRIPTOR:.*]] = getelementptr i8, i8* %[[BUF]], i64 2
+// CHECK: store i8 0, i8* %[[ARGDESCRIPTOR]], align 1
+// CHECK: %[[ARGSIZE:.*]] = getelementptr i8, i8* %[[BUF]], i64 3
+// CHECK: store i8 4, i8* %[[ARGSIZE]], align 1
+// CHECK: %[[ARGDATA:.*]] = getelementptr i8, i8* %[[BUF]], i64 4
+// CHECK: %[[ARGDATACAST:.*]] = bitcast i8* %[[ARGDATA]] to i32*
+// CHECK: %[[V0:.*]] = load i32, i32* %[[ARG0_ADDR]], align 4
+// CHECK: store i32 %[[V0]], i32* %[[ARGDATACAST]], align 1
+// CHECK: %[[ARGDESCRIPTOR1:.*]] = getelementptr i8, i8* %[[BUF]], i64 8
+// CHECK: store i8 16, i8* %[[ARGDESCRIPTOR1]], align 1
+// CHECK: %[[ARGSIZE2:.*]] = getelementptr i8, i8* %[[BUF]], i64 9
+// CHECK: store i8 4, i8* %[[ARGSIZE2]], align 1
+// CHECK: %[[ARGDATA3:.*]] = getelementptr i8, i8* %[[BUF]], i64 10
+// CHECK: %[[ARGDATACAST4:.*]] = bitcast i8* %[[ARGDATA3]] to i32*
+// CHECK: %[[V1:.*]] = load i32, i32* %[[ARG1_ADDR]], align 4
+// CHECK: store i32 %[[V1]], i32* %[[ARGDATACAST4]], align 1
+// CHECK: %[[ARGDESCRIPTOR5:.*]] = getelementptr i8, i8* %[[BUF]], i64 14
+// CHECK: store i8 32, i8* %[[ARGDESCRIPTOR5]], align 1
+// CHECK: %[[ARGSIZE6:.*]] = getelementptr i8, i8* %[[BUF]], i64 15
+// CHECK: store i8 8, i8* %[[ARGSIZE6]], align 1
+// CHECK: %[[ARGDATA7:.*]] = getelementptr i8, i8* %[[BUF]], i64 16
+// CHECK: %[[ARGDATACAST8:.*]] = bitcast i8* %[[ARGDATA7]] to i64*
+// CHECK: %[[V2:.*]] = load i64, i64* %[[ARG2_ADDR]], align 8
+// CHECK: store i64 %[[V2]], i64* %[[ARGDATACAST8]], align 1
+
 // CHECK-LABEL: define void @test_builtin_os_log_invalid
-// CHECK: (i8* [[BUF:%.*]], i32 [[DATA:%.*]])
+// CHECK: (i8* %[[BUF:.*]], i32 %[[DATA:.*]])
 void test_builtin_os_log_invalid(void *buf, int data) {
   volatile int len;
-  // CHECK: store i8* [[BUF]], i8** [[BUF_ADDR:%.*]], align 8
-  // CHECK: store i32 [[DATA]], i32* [[DATA_ADDR:%.*]]
+  // CHECK: %[[BUF_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[DATA_ADDR:.*]] = alloca i32, align 4
+  // CHECK: %[[LEN:.*]] = alloca i32, align 4
+  // CHECK: store i8* %[[BUF]], i8** %[[BUF_ADDR]], align 8
+  // CHECK: store i32 %[[DATA]], i32* %[[DATA_ADDR]], align 4
 
-  // CHECK: store volatile i32 8,
+  // CHECK: store volatile i32 8, i32* %[[LEN]], align 4
   len = __builtin_os_log_format_buffer_size("invalid specifier %: %d even a trailing one%", data);
 
-  // CHECK: [[BUF2:%.*]] = load i8*, i8** [[BUF_ADDR]]
-  // CHECK: [[SUMMARY:%.*]] = getelementptr i8, i8* [[BUF2]], i64 0
-  // CHECK: store i8 0, i8* [[SUMMARY]]
-  // CHECK: [[NUM_ARGS:%.*]] = getelementptr i8, i8* [[BUF2]], i64 1
-  // CHECK: store i8 1, i8* [[NUM_ARGS]]
-
-  // CHECK: [[ARG1_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 2
-  // CHECK: store i8 0, i8* [[ARG1_DESC]]
-  // CHECK: [[ARG1_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 3
-  // CHECK: store i8 4, i8* [[ARG1_SIZE]]
-  // CHECK: [[ARG1:%.*]] = getelementptr i8, i8* [[BUF2]], i64 4
-  // CHECK: [[ARG1_INT:%.*]] = bitcast i8* [[ARG1]] to i32*
-  // CHECK: [[ARG1_VAL:%.*]] = load i32, i32* [[DATA_ADDR]]
-  // CHECK: store i32 [[ARG1_VAL]], i32* [[ARG1_INT]]
+  // CHECK: %[[V1:.*]] = load i8*, i8** %[[BUF_ADDR]], align 8
+  // CHECK: %[[V2:.*]] = load i32, i32* %[[DATA_ADDR]], align 4
+  // CHECK: call void @__os_log_helper_1_0_1_4_0(i8* %[[V1]], i32 %[[V2]])
 
   __builtin_os_log_format(buf, "invalid specifier %: %d even a trailing one%", data);
 }
 
+// CHECKOZ-LABEL: define linkonce_odr hidden void @__os_log_helper_1_0_1_4_0
+// CHECKOS-LABEL: define internal void @__os_log_helper_1_0_1_4_0
+// CHECK: (i8* %[[BUFFER:.*]], i32 %[[ARG0:.*]])
+
+// CHECK: %[[BUFFER_ADDR:.*]] = alloca i8*, align 8
+// CHECK: %[[ARG0_ADDR:.*]] = alloca i32, align 4
+// CHECK: store i8* %[[BUFFER]], i8** %[[BUFFER_ADDR]], align 8
+// CHECK: store i32 %[[ARG0]], i32* %[[ARG0_ADDR]], align 4
+// CHECK: %[[BUF:.*]] = load i8*, i8** %[[BUFFER_ADDR]], align 8
+// CHECK: %[[SUMMARY:.*]] = getelementptr i8, i8* %[[BUF]], i64 0
+// CHECK: store i8 0, i8* %[[SUMMARY]], align 1
+// CHECK: %[[NUMARGS:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
+// CHECK: store i8 1, i8* %[[NUMARGS]], align 1
+// CHECK: %[[ARGDESCRIPTOR:.*]] = getelementptr i8, i8* %[[BUF]], i64 2
+// CHECK: store i8 0, i8* %[[ARGDESCRIPTOR]], align 1
+// CHECK: %[[ARGSIZE:.*]] = getelementptr i8, i8* %[[BUF]], i64 3
+// CHECK: store i8 4, i8* %[[ARGSIZE]], align 1
+// CHECK: %[[ARGDATA:.*]] = getelementptr i8, i8* %[[BUF]], i64 4
+// CHECK: %[[ARGDATACAST:.*]] = bitcast i8* %[[ARGDATA]] to i32*
+// CHECK: %[[V0:.*]] = load i32, i32* %[[ARG0_ADDR]], align 4
+// CHECK: store i32 %[[V0]], i32* %[[ARGDATACAST]], align 1
+
 // CHECK-LABEL: define void @test_builtin_os_log_percent
-// CHECK: (i8* [[BUF:%.*]], i8* [[DATA1:%.*]], i8* [[DATA2:%.*]])
+// CHECK: (i8* %[[BUF:.*]], i8* %[[DATA1:.*]], i8* %[[DATA2:.*]])
 // Check that the %% which does not consume any argument is correctly handled
 void test_builtin_os_log_percent(void *buf, const char *data1, const char *data2) {
   volatile int len;
-  // CHECK: store i8* [[BUF]], i8** [[BUF_ADDR:%.*]], align 8
-  // CHECK: store i8* [[DATA1]], i8** [[DATA1_ADDR:%.*]], align 8
-  // CHECK: store i8* [[DATA2]], i8** [[DATA2_ADDR:%.*]], align 8
-  // CHECK: store volatile i32 22
+  // CHECK: %[[BUF_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[DATA1_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[DATA2_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[LEN:.*]] = alloca i32, align 4
+  // CHECK: store i8* %[[BUF]], i8** %[[BUF_ADDR]], align 8
+  // CHECK: store i8* %[[DATA1]], i8** %[[DATA1_ADDR]], align 8
+  // CHECK: store i8* %[[DATA2]], i8** %[[DATA2_ADDR]], align 8
+  // CHECK: store volatile i32 22, i32* %[[LEN]], align 4
+
   len = __builtin_os_log_format_buffer_size("%s %% %s", data1, data2);
 
-  // CHECK: [[BUF2:%.*]] = load i8*, i8** [[BUF_ADDR]]
-  // CHECK: [[SUMMARY:%.*]] = getelementptr i8, i8* [[BUF2]], i64 0
-  // CHECK: store i8 2, i8* [[SUMMARY]]
-  // CHECK: [[NUM_ARGS:%.*]] = getelementptr i8, i8* [[BUF2]], i64 1
-  // CHECK: store i8 2, i8* [[NUM_ARGS]]
-  //
-  // CHECK: [[ARG1_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 2
-  // CHECK: store i8 32, i8* [[ARG1_DESC]]
-  // CHECK: [[ARG1_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 3
-  // CHECK: store i8 8, i8* [[ARG1_SIZE]]
-  // CHECK: [[ARG1:%.*]] = getelementptr i8, i8* [[BUF2]], i64 4
-  // CHECK: [[ARG1_PTR:%.*]] = bitcast i8* [[ARG1]] to i8**
-  // CHECK: [[DATA1:%.*]] = load i8*, i8** [[DATA1_ADDR]]
-  // CHECK: store i8* [[DATA1]], i8** [[ARG1_PTR]]
-  //
-  // CHECK: [[ARG2_DESC:%.*]] = getelementptr i8, i8* [[BUF2]], i64 12
-  // CHECK: store i8 32, i8* [[ARG2_DESC]]
-  // CHECK: [[ARG2_SIZE:%.*]] = getelementptr i8, i8* [[BUF2]], i64 13
-  // CHECK: store i8 8, i8* [[ARG2_SIZE]]
-  // CHECK: [[ARG2:%.*]] = getelementptr i8, i8* [[BUF2]], i64 14
-  // CHECK: [[ARG2_PTR:%.*]] = bitcast i8* [[ARG2]] to i8**
-  // CHECK: [[DATA2:%.*]] = load i8*, i8** [[DATA2_ADDR]]
-  // CHECK: store i8* [[DATA2]], i8** [[ARG2_PTR]]
+  // CHECK: %[[V1:.*]] = load i8*, i8** %[[BUF_ADDR]], align 8
+  // CHECK: %[[V2:.*]] = load i8*, i8** %[[DATA1_ADDR]], align 8
+  // CHECK: %[[V3:.*]] = ptrtoint i8* %[[V2]] to i64
+  // CHECK: %[[V4:.*]] = load i8*, i8** %[[DATA2_ADDR]], align 8
+  // CHECK: %[[V5:.*]] = ptrtoint i8* %[[V4]] to i64
+  // CHECK: call void @__os_log_helper_1_2_2_8_32_8_32(i8* %[[V1]], i64 %[[V3]], i64 %[[V5]])
+
   __builtin_os_log_format(buf, "%s %% %s", data1, data2);
 }
 
-#endif
\ No newline at end of file
+// CHECKOZ-LABEL: define linkonce_odr hidden void @__os_log_helper_1_2_2_8_32_8_32
+// CHECKOS-LABEL: define internal void @__os_log_helper_1_2_2_8_32_8_32
+// CHECK: (i8* %[[BUFFER:.*]], i64 %[[ARG0:.*]], i64 %[[ARG1:.*]])
+
+// CHECK: %[[BUFFER_ADDR:.*]] = alloca i8*, align 8
+// CHECK: %[[ARG0_ADDR:.*]] = alloca i64, align 8
+// CHECK: %[[ARG1_ADDR:.*]] = alloca i64, align 8
+// CHECK: store i8* %[[BUFFER]], i8** %[[BUFFER_ADDR]], align 8
+// CHECK: store i64 %[[ARG0]], i64* %[[ARG0_ADDR]], align 8
+// CHECK: store i64 %[[ARG1]], i64* %[[ARG1_ADDR]], align 8
+// CHECK: %[[BUF:.*]] = load i8*, i8** %[[BUFFER_ADDR]], align 8
+// CHECK: %[[SUMMARY:.*]] = getelementptr i8, i8* %[[BUF]], i64 0
+// CHECK: store i8 2, i8* %[[SUMMARY]], align 1
+// CHECK: %[[NUMARGS:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
+// CHECK: store i8 2, i8* %[[NUMARGS]], align 1
+// CHECK: %[[ARGDESCRIPTOR:.*]] = getelementptr i8, i8* %[[BUF]], i64 2
+// CHECK: store i8 32, i8* %[[ARGDESCRIPTOR]], align 1
+// CHECK: %[[ARGSIZE:.*]] = getelementptr i8, i8* %[[BUF]], i64 3
+// CHECK: store i8 8, i8* %[[ARGSIZE]], align 1
+// CHECK: %[[ARGDATA:.*]] = getelementptr i8, i8* %[[BUF]], i64 4
+// CHECK: %[[ARGDATACAST:.*]] = bitcast i8* %[[ARGDATA]] to i64*
+// CHECK: %[[V0:.*]] = load i64, i64* %[[ARG0_ADDR]], align 8
+// CHECK: store i64 %[[V0]], i64* %[[ARGDATACAST]], align 1
+// CHECK: %[[ARGDESCRIPTOR1:.*]] = getelementptr i8, i8* %[[BUF]], i64 12
+// CHECK: store i8 32, i8* %[[ARGDESCRIPTOR1]], align 1
+// CHECK: %[[ARGSIZE2:.*]] = getelementptr i8, i8* %[[BUF]], i64 13
+// CHECK: store i8 8, i8* %[[ARGSIZE2]], align 1
+// CHECK: %[[ARGDATA3:.*]] = getelementptr i8, i8* %[[BUF]], i64 14
+// CHECK: %[[ARGDATACAST4:.*]] = bitcast i8* %[[ARGDATA3]] to i64*
+// CHECK: %[[V1:.*]] = load i64, i64* %[[ARG1_ADDR]], align 8
+// CHECK: store i64 %[[V1]], i64* %[[ARGDATACAST4]], align 1
+
+// Check that the following two functions call the same helper function.
+
+// CHECK-LABEL: define void @test_builtin_os_log_merge_helper0
+// CHECK: call void @__os_log_helper_1_0_2_4_0_8_0(
+void test_builtin_os_log_merge_helper0(void *buf, int i, double d) {
+  __builtin_os_log_format(buf, "%d %f", i, d);
+}
+
+// CHECKOZ-LABEL: define linkonce_odr hidden void @__os_log_helper_1_0_2_4_0_8_0(
+// CHECKOS-LABEL: define internal void @__os_log_helper_1_0_2_4_0_8_0(
+
+// CHECK-LABEL: define void @test_builtin_os_log_merge_helper1
+// CHECK: call void @__os_log_helper_1_0_2_4_0_8_0(
+void test_builtin_os_log_merge_helper1(void *buf, unsigned u, long long ll) {
+  __builtin_os_log_format(buf, "%u %lld", u, ll);
+}
+
+// Check that this function doesn't write past the end of array 'buf'.
+
+// CHECK-LABEL: define void @test_builtin_os_log_errno
+void test_builtin_os_log_errno() {
+  // CHECK: %[[VLA:.*]] = alloca i8, i64 4, align 16
+  // CHECK: call void @__os_log_helper_16_2_1_0_96(i8* %[[VLA]])
+
+  char buf[__builtin_os_log_format_buffer_size("%m")];
+  __builtin_os_log_format(buf, "%m");
+}
+
+// CHECKOZ-LABEL: define linkonce_odr hidden void @__os_log_helper_16_2_1_0_96
+// CHECKOS-LABEL: define internal void @__os_log_helper_16_2_1_0_96
+// CHECK: (i8* %[[BUFFER:.*]])
+
+// CHECK: %[[BUFFER_ADDR:.*]] = alloca i8*, align 8
+// CHECK: store i8* %[[BUFFER]], i8** %[[BUFFER_ADDR]], align 8
+// CHECK: %[[BUF:.*]] = load i8*, i8** %[[BUFFER_ADDR]], align 8
+// CHECK: %[[SUMMARY:.*]] = getelementptr i8, i8* %[[BUF]], i64 0
+// CHECK: store i8 2, i8* %[[SUMMARY]], align 16
+// CHECK: %[[NUMARGS:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
+// CHECK: store i8 1, i8* %[[NUMARGS]], align 1
+// CHECK: %[[ARGDESCRIPTOR:.*]] = getelementptr i8, i8* %[[BUF]], i64 2
+// CHECK: store i8 96, i8* %[[ARGDESCRIPTOR]], align 2
+// CHECK: %[[ARGSIZE:.*]] = getelementptr i8, i8* %[[BUF]], i64 3
+// CHECK: store i8 0, i8* %[[ARGSIZE]], align 1
+// CHECK-NEXT: ret void
+
+// CHECK-LABEL: define void @test_builtin_os_log_long_double
+// CHECK: (i8* %[[BUF:.*]], x86_fp80 %[[LD:.*]])
+void test_builtin_os_log_long_double(void *buf, long double ld) {
+  // CHECK: %[[BUF_ADDR:.*]] = alloca i8*, align 8
+  // CHECK: %[[LD_ADDR:.*]] = alloca x86_fp80, align 16
+  // CHECK: %[[COERCE:.*]] = alloca i128, align 16
+  // CHECK: store i8* %[[BUF]], i8** %[[BUF_ADDR]], align 8
+  // CHECK: store x86_fp80 %[[LD]], x86_fp80* %[[LD_ADDR]], align 16
+  // CHECK: %[[V0:.*]] = load i8*, i8** %[[BUF_ADDR]], align 8
+  // CHECK: %[[V1:.*]] = load x86_fp80, x86_fp80* %[[LD_ADDR]], align 16
+  // CHECK: %[[V2:.*]] = bitcast x86_fp80 %[[V1]] to i80
+  // CHECK: %[[V3:.*]] = zext i80 %[[V2]] to i128
+  // CHECK: store i128 %[[V3]], i128* %[[COERCE]], align 16
+  // CHECK: %[[V4:.*]] = bitcast i128* %[[COERCE]] to { i64, i64 }*
+  // CHECK: %[[V5:.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[V4]], i32 0, i32 0
+  // CHECK: %[[V6:.*]] = load i64, i64* %[[V5]], align 16
+  // CHECK: %[[V7:.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[V4]], i32 0, i32 1
+  // CHECK: %[[V8:.*]] = load i64, i64* %[[V7]], align 8
+  // CHECK: call void @__os_log_helper_1_0_1_16_0(i8* %[[V0]], i64 %[[V6]], i64 %[[V8]])
+
+  __builtin_os_log_format(buf, "%Lf", ld);
+}
+
+// CHECKOZ-LABEL: define linkonce_odr hidden void @__os_log_helper_1_0_1_16_0
+// CHECKOS-LABEL: define internal void @__os_log_helper_1_0_1_16_0
+// CHECK: (i8* %[[BUFFER:.*]], i64 %[[ARG0_COERCE0:.*]], i64 %[[ARG0_COERCE1:.*]])
+
+// CHECK: %[[ARG0:.*]] = alloca i128, align 16
+// CHECK: %[[BUFFER_ADDR:.*]] = alloca i8*, align 8
+// CHECK: %[[ARG0_ADDR:.*]] = alloca i128, align 16
+// CHECK: %[[V0:.*]] = bitcast i128* %[[ARG0]] to { i64, i64 }*
+// CHECK: %[[V1:.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[V0]], i32 0, i32 0
+// CHECK: store i64 %[[ARG0_COERCE0]], i64* %[[V1]], align 16
+// CHECK: %[[V2:.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[V0]], i32 0, i32 1
+// CHECK: store i64 %[[ARG0_COERCE1]], i64* %[[V2]], align 8
+// CHECK: %[[ARG01:.*]] = load i128, i128* %[[ARG0]], align 16
+// CHECK: store i8* %[[BUFFER]], i8** %[[BUFFER_ADDR]], align 8
+// CHECK: store i128 %[[ARG01]], i128* %[[ARG0_ADDR]], align 16
+// CHECK: %[[BUF:.*]] = load i8*, i8** %[[BUFFER_ADDR]], align 8
+// CHECK: %[[SUMMARY:.*]] = getelementptr i8, i8* %[[BUF]], i64 0
+// CHECK: store i8 0, i8* %[[SUMMARY]], align 1
+// CHECK: %[[NUMARGS:.*]] = getelementptr i8, i8* %[[BUF]], i64 1
+// CHECK: store i8 1, i8* %[[NUMARGS]], align 1
+// CHECK: %[[ARGDESCRIPTOR:.*]] = getelementptr i8, i8* %[[BUF]], i64 2
+// CHECK: store i8 0, i8* %[[ARGDESCRIPTOR]], align 1
+// CHECK: %[[ARGSIZE:.*]] = getelementptr i8, i8* %[[BUF]], i64 3
+// CHECK: store i8 16, i8* %[[ARGSIZE]], align 1
+// CHECK: %[[ARGDATA:.*]] = getelementptr i8, i8* %[[BUF]], i64 4
+// CHECK: %[[ARGDATACAST:.*]] = bitcast i8* %[[ARGDATA]] to i128*
+// CHECK: %[[V3:.*]] = load i128, i128* %[[ARG0_ADDR]], align 16
+// CHECK: store i128 %[[V3]], i128* %[[ARGDATACAST]], align 1
+
+#endif
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -76,6 +76,10 @@
 class ObjCAtSynchronizedStmt;
 class ObjCAutoreleasePoolStmt;
 
+namespace analyze_os_log {
+class OSLogBufferLayout;
+}
+
 namespace CodeGen {
 class CodeGenTypes;
 class CGCallee;
@@ -3301,6 +3305,13 @@
                          unsigned BuiltinID, const CallExpr *E,
                          ReturnValueSlot ReturnValue);
 
+  /// Emit IR for __builtin_os_log_format.
+  RValue emitBuiltinOSLogFormat(const CallExpr &E);
+
+  llvm::Function *generateBuiltinOSLogHelperFunction(
+      const analyze_os_log::OSLogBufferLayout &Layout,
+      CharUnits BufferAlignment);
+
   RValue EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue);
 
   /// EmitTargetBuiltinExpr - Emit the given builtin call. Returns 0 if the call
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -663,6 +663,183 @@
   return ArgValue;
 }
 
+/// Get the argument type for arguments to os_log_helper.
+static CanQualType getOSLogArgType(ASTContext &C, int Size) {
+  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
+  return C.getCanonicalType(UnsignedTy);
+}
+
+llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
+    const analyze_os_log::OSLogBufferLayout &Layout,
+    CharUnits BufferAlignment) {
+  ASTContext &Ctx = getContext();
+
+  llvm::SmallString<64> Name;
+  {
+    raw_svector_ostream OS(Name);
+    OS << "__os_log_helper";
+    OS << "_" << BufferAlignment.getQuantity();
+    OS << "_" << int(Layout.getSummaryByte());
+    OS << "_" << int(Layout.getNumArgsByte());
+    for (const auto &Item : Layout.Items)
+      OS << "_" << int(Item.getSizeByte()) << "_"
+         << int(Item.getDescriptorByte());
+  }
+
+  if (llvm::Function *F = CGM.getModule().getFunction(Name))
+    return F;
+
+  llvm::SmallVector<ImplicitParamDecl, 4> Params;
+  Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
+                      Ctx.VoidPtrTy, ImplicitParamDecl::Other);
+
+  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
+    char Size = Layout.Items[I].getSizeByte();
+    if (!Size)
+      continue;
+
+    Params.emplace_back(Ctx, nullptr, SourceLocation(),
+                        &Ctx.Idents.get(std::string("arg") + std::to_string(I)),
+                        getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other);
+  }
+
+  FunctionArgList Args;
+  for (auto &P : Params)
+    Args.push_back(&P);
+
+  // When compiling with -Oz, the helper function is linkonce_odr, hidden, and
+  // noinline. This enables the linker to merge indentical helper functions
+  // across translation units.
+  bool IsOz = CGM.getCodeGenOpts().OptimizeSize == 2;
+  llvm::GlobalValue::LinkageTypes LT =
+      IsOz ? llvm::GlobalValue::LinkOnceODRLinkage :
+             llvm::GlobalValue::InternalLinkage;
+  const CGFunctionInfo &FI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
+  llvm::Function *Fn = llvm::Function::Create(FuncTy, LT, Name,
+                                              &CGM.getModule());
+  CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
+  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
+  if (IsOz) {
+    Fn->addFnAttr(llvm::Attribute::NoInline);
+    Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  }
+
+  auto NL = ApplyDebugLocation::CreateEmpty(*this);
+  StorageClass SC = IsOz ? SC_PrivateExtern : SC_Static;
+  IdentifierInfo *II = &Ctx.Idents.get(Name);
+  FunctionDecl *FD = FunctionDecl::Create(
+      Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+      Ctx.VoidTy, nullptr, SC, false, false);
+
+  StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
+
+  // Create a scope with an artificial location for the body of this function.
+  auto AL = ApplyDebugLocation::CreateArtificial(*this);
+
+  CharUnits Offset;
+  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
+                  BufferAlignment);
+  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
+                      Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
+  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
+                      Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
+
+  unsigned I = 1;
+  for (const auto &Item : Layout.Items) {
+    Builder.CreateStore(
+        Builder.getInt8(Item.getDescriptorByte()),
+        Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
+    Builder.CreateStore(
+        Builder.getInt8(Item.getSizeByte()),
+        Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
+
+    CharUnits Size = Item.size();
+    if (!Size.getQuantity())
+      continue;
+
+    Address Arg = GetAddrOfLocalVar(&Params[I]);
+    Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
+    Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
+                                 "argDataCast");
+    Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
+    Offset += Size;
+    ++I;
+  }
+
+  FinishFunction();
+
+  return Fn;
+}
+
+RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
+  assert(E.getNumArgs() >= 2 &&
+         "__builtin_os_log_format takes at least 2 arguments");
+  ASTContext &Ctx = getContext();
+  analyze_os_log::OSLogBufferLayout Layout;
+  analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);
+  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
+  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
+
+  // Ignore argument 1, the format string. It is not currently used.
+  CallArgList Args;
+  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
+
+  for (const auto &Item : Layout.Items) {
+    int Size = Item.getSizeByte();
+    if (!Size)
+      continue;
+
+    llvm::Value *ArgVal;
+
+    if (const Expr *TheExpr = Item.getExpr()) {
+      ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
+
+      // Check if this is a retainable type.
+      if (TheExpr->getType()->isObjCRetainableType()) {
+        assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
+               "Only scalar can be a ObjC retainable type");
+        // Check if the object is constant, if not, save it in
+        // RetainableOperands.
+        if (!isa<Constant>(ArgVal))
+          RetainableOperands.push_back(ArgVal);
+      }
+    } else {
+      ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
+    }
+
+    unsigned ArgValSize =
+        CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
+    llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
+                                                     ArgValSize);
+    ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
+    CanQualType ArgTy = getOSLogArgType(Ctx, Size);
+    // If ArgVal has type x86_fp80, zero-extend ArgVal.
+    ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
+    Args.add(RValue::get(ArgVal), ArgTy);
+  }
+
+  const CGFunctionInfo &FI =
+      CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
+  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
+      Layout, BufAddr.getAlignment());
+  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
+
+  // Push a clang.arc.use cleanup for each object in RetainableOperands. The
+  // cleanup will cause the use to appear after the final log call, keeping
+  // the object valid while it’s held in the log buffer.  Note that if there’s
+  // a release cleanup on the object, it will already be active; since
+  // cleanups are emitted in reverse order, the use will occur before the
+  // object is released.
+  if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
+      CGM.getCodeGenOpts().OptimizationLevel != 0)
+    for (llvm::Value *Object : RetainableOperands)
+      pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
+
+  return RValue::get(BufAddr.getPointer());
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
                                         unsigned BuiltinID, const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -2801,69 +2978,8 @@
     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
     break;
   }
-  case Builtin::BI__builtin_os_log_format: {
-    assert(E->getNumArgs() >= 2 &&
-           "__builtin_os_log_format takes at least 2 arguments");
-    analyze_os_log::OSLogBufferLayout Layout;
-    analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
-    Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
-    // Ignore argument 1, the format string. It is not currently used.
-    CharUnits Offset;
-    Builder.CreateStore(
-        Builder.getInt8(Layout.getSummaryByte()),
-        Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
-    Builder.CreateStore(
-        Builder.getInt8(Layout.getNumArgsByte()),
-        Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
-
-    llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
-    for (const auto &Item : Layout.Items) {
-      Builder.CreateStore(
-          Builder.getInt8(Item.getDescriptorByte()),
-          Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
-      Builder.CreateStore(
-          Builder.getInt8(Item.getSizeByte()),
-          Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
-      Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
-      if (const Expr *TheExpr = Item.getExpr()) {
-        Addr = Builder.CreateElementBitCast(
-            Addr, ConvertTypeForMem(TheExpr->getType()));
-        // Check if this is a retainable type.
-        if (TheExpr->getType()->isObjCRetainableType()) {
-          assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
-                 "Only scalar can be a ObjC retainable type");
-          llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
-          RValue RV = RValue::get(SV);
-          LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
-          EmitStoreThroughLValue(RV, LV);
-          // Check if the object is constant, if not, save it in
-          // RetainableOperands.
-          if (!isa<Constant>(SV))
-            RetainableOperands.push_back(SV);
-        } else {
-          EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
-        }
-      } else {
-        Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
-        Builder.CreateStore(
-            Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
-      }
-      Offset += Item.size();
-    }
-
-    // Push a clang.arc.use cleanup for each object in RetainableOperands. The
-    // cleanup will cause the use to appear after the final log call, keeping
-    // the object valid while it's held in the log buffer.  Note that if there's
-    // a release cleanup on the object, it will already be active; since
-    // cleanups are emitted in reverse order, the use will occur before the
-    // object is released.
-    if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
-        CGM.getCodeGenOpts().OptimizationLevel != 0)
-      for (llvm::Value *object : RetainableOperands)
-        pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
-
-    return RValue::get(BufAddr.getPointer());
-  }
+  case Builtin::BI__builtin_os_log_format:
+    return emitBuiltinOSLogFormat(*E);
 
   case Builtin::BI__builtin_os_log_format_buffer_size: {
     analyze_os_log::OSLogBufferLayout Layout;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to