ZarkoCA created this revision.
ZarkoCA added reviewers: cebowleratibm, sfertile, PowerPC.
Herald added subscribers: shchenz, kbarton, nemanjai.
ZarkoCA requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Remove the previous error and add support for special handling of small
complex types as in PPC64 ELF ABI. As in, generate code to load from
varargs location and pack it in a temp variable, then return a pointer to
the struct.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106393

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/aix32-complex-varargs.c
  clang/test/CodeGen/ppc64-varargs-complex.c
  llvm/test/CodeGen/PowerPC/aix32-complex-vararg.ll
  llvm/test/CodeGen/PowerPC/aix64-complex-vararg.ll

Index: llvm/test/CodeGen/PowerPC/aix64-complex-vararg.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/aix64-complex-vararg.ll
@@ -0,0 +1,510 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \
+; RUN:  -mtriple powerpc64-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefix=64BIT %s
+
+@cdbl = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cdbl1 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cdbl2 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cdbl3 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cflt = local_unnamed_addr global { float, float } zeroinitializer, align 4
+@cflt1 = local_unnamed_addr global { float, float } zeroinitializer, align 4
+@cflt2 = local_unnamed_addr global { float, float } zeroinitializer, align 4
+@cflt3 = local_unnamed_addr global { float, float } zeroinitializer, align 4
+@cldbl = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cldbl1 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cldbl2 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cldbl3 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+
+define { double, double } @foo1(double %x.coerce0, double %x.coerce1, ...) local_unnamed_addr {
+; 64BIT-LABEL: foo1:
+; 64BIT:       # %bb.0: # %entry
+; 64BIT-NEXT:    ld 3, L..C0(2) # %const.0
+; 64BIT-NEXT:    std 5, 64(1)
+; 64BIT-NEXT:    std 6, 72(1)
+; 64BIT-NEXT:    lfd 4, 64(1)
+; 64BIT-NEXT:    lfd 5, 72(1)
+; 64BIT-NEXT:    std 7, 80(1)
+; 64BIT-NEXT:    std 8, 88(1)
+; 64BIT-NEXT:    lfs 0, 0(3)
+; 64BIT-NEXT:    std 9, 96(1)
+; 64BIT-NEXT:    addi 3, 1, 64
+; 64BIT-NEXT:    std 10, 104(1)
+; 64BIT-NEXT:    fadd 3, 1, 0
+; 64BIT-NEXT:    fadd 0, 2, 0
+; 64BIT-NEXT:    fadd 3, 3, 4
+; 64BIT-NEXT:    fadd 0, 0, 5
+; 64BIT-NEXT:    lfdu 4, 16(3)
+; 64BIT-NEXT:    lfd 5, 88(1)
+; 64BIT-NEXT:    fadd 3, 3, 1
+; 64BIT-NEXT:    fadd 0, 0, 2
+; 64BIT-NEXT:    fadd 3, 3, 4
+; 64BIT-NEXT:    fadd 0, 0, 5
+; 64BIT-NEXT:    lfd 4, 96(1)
+; 64BIT-NEXT:    lfd 5, 104(1)
+; 64BIT-NEXT:    fadd 3, 3, 1
+; 64BIT-NEXT:    fadd 0, 0, 2
+; 64BIT-NEXT:    fadd 3, 3, 4
+; 64BIT-NEXT:    fadd 0, 0, 5
+; 64BIT-NEXT:    lfd 4, 112(1)
+; 64BIT-NEXT:    lfd 5, 120(1)
+; 64BIT-NEXT:    fadd 3, 3, 1
+; 64BIT-NEXT:    fadd 0, 0, 2
+; 64BIT-NEXT:    fadd 3, 3, 4
+; 64BIT-NEXT:    fadd 0, 0, 5
+; 64BIT-NEXT:    fadd 1, 3, 1
+; 64BIT-NEXT:    fadd 0, 0, 2
+; 64BIT-NEXT:    lfd 2, 128(1)
+; 64BIT-NEXT:    lfd 3, 136(1)
+; 64BIT-NEXT:    fadd 1, 1, 2
+; 64BIT-NEXT:    fadd 2, 0, 3
+; 64BIT-NEXT:    blr
+entry:
+  %arg = alloca i8*, align 8
+  %0 = bitcast i8** %arg to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %add.r = fadd double %x.coerce0, 0.000000e+00
+  %add.i = fadd double %x.coerce1, 0.000000e+00
+  %argp.cur = load i8*, i8** %arg, align 8
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur, i64 16
+  store i8* %argp.next, i8** %arg, align 8
+  %.realp = bitcast i8* %argp.cur to double*
+  %.real = load double, double* %.realp, align 8
+  %.imagp = getelementptr inbounds i8, i8* %argp.cur, i64 8
+  %1 = bitcast i8* %.imagp to double*
+  %.imag = load double, double* %1, align 8
+  %add.r4 = fadd double %add.r, %.real
+  %add.i5 = fadd double %add.i, %.imag
+  %add.r.1 = fadd double %add.r4, %x.coerce0
+  %add.i.1 = fadd double %add.i5, %x.coerce1
+  %argp.next.1 = getelementptr inbounds i8, i8* %argp.cur, i64 32
+  %.realp.1 = bitcast i8* %argp.next to double*
+  %.real.1 = load double, double* %.realp.1, align 8
+  %.imagp.1 = getelementptr inbounds i8, i8* %argp.cur, i64 24
+  %2 = bitcast i8* %.imagp.1 to double*
+  %.imag.1 = load double, double* %2, align 8
+  %add.r4.1 = fadd double %add.r.1, %.real.1
+  %add.i5.1 = fadd double %add.i.1, %.imag.1
+  %add.r.2 = fadd double %add.r4.1, %x.coerce0
+  %add.i.2 = fadd double %add.i5.1, %x.coerce1
+  %argp.next.2 = getelementptr inbounds i8, i8* %argp.cur, i64 48
+  %.realp.2 = bitcast i8* %argp.next.1 to double*
+  %.real.2 = load double, double* %.realp.2, align 8
+  %.imagp.2 = getelementptr inbounds i8, i8* %argp.cur, i64 40
+  %3 = bitcast i8* %.imagp.2 to double*
+  %.imag.2 = load double, double* %3, align 8
+  %add.r4.2 = fadd double %add.r.2, %.real.2
+  %add.i5.2 = fadd double %add.i.2, %.imag.2
+  %add.r.3 = fadd double %add.r4.2, %x.coerce0
+  %add.i.3 = fadd double %add.i5.2, %x.coerce1
+  %argp.next.3 = getelementptr inbounds i8, i8* %argp.cur, i64 64
+  %.realp.3 = bitcast i8* %argp.next.2 to double*
+  %.real.3 = load double, double* %.realp.3, align 8
+  %.imagp.3 = getelementptr inbounds i8, i8* %argp.cur, i64 56
+  %4 = bitcast i8* %.imagp.3 to double*
+  %.imag.3 = load double, double* %4, align 8
+  %add.r4.3 = fadd double %add.r.3, %.real.3
+  %add.i5.3 = fadd double %add.i.3, %.imag.3
+  %add.r.4 = fadd double %add.r4.3, %x.coerce0
+  %add.i.4 = fadd double %add.i5.3, %x.coerce1
+  %argp.next.4 = getelementptr inbounds i8, i8* %argp.cur, i64 80
+  store i8* %argp.next.4, i8** %arg, align 8
+  %.realp.4 = bitcast i8* %argp.next.3 to double*
+  %.real.4 = load double, double* %.realp.4, align 8
+  %.imagp.4 = getelementptr inbounds i8, i8* %argp.cur, i64 72
+  %5 = bitcast i8* %.imagp.4 to double*
+  %.imag.4 = load double, double* %5, align 8
+  %add.r4.4 = fadd double %add.r.4, %.real.4
+  %add.i5.4 = fadd double %add.i.4, %.imag.4
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  %.fca.0.insert = insertvalue { double, double } undef, double %add.r4.4, 0
+  %.fca.1.insert = insertvalue { double, double } %.fca.0.insert, double %add.i5.4, 1
+  ret { double, double } %.fca.1.insert
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_end(i8*)
+
+define { float, float } @foo2(float %x.coerce0, float %x.coerce1, ...) local_unnamed_addr {
+; 64BIT-LABEL: foo2:
+; 64BIT:       # %bb.0: # %entry
+; 64BIT-NEXT:    ld 3, L..C1(2) # %const.0
+; 64BIT-NEXT:    std 5, 64(1)
+; 64BIT-NEXT:    std 6, 72(1)
+; 64BIT-NEXT:    lfs 4, 68(1)
+; 64BIT-NEXT:    lfs 5, 76(1)
+; 64BIT-NEXT:    std 7, 80(1)
+; 64BIT-NEXT:    std 8, 88(1)
+; 64BIT-NEXT:    lfs 0, 0(3)
+; 64BIT-NEXT:    std 9, 96(1)
+; 64BIT-NEXT:    std 10, 104(1)
+; 64BIT-NEXT:    fadds 3, 1, 0
+; 64BIT-NEXT:    fadds 0, 2, 0
+; 64BIT-NEXT:    fadds 3, 3, 4
+; 64BIT-NEXT:    fadds 0, 0, 5
+; 64BIT-NEXT:    lfs 4, 84(1)
+; 64BIT-NEXT:    lfs 5, 92(1)
+; 64BIT-NEXT:    fadds 3, 3, 1
+; 64BIT-NEXT:    fadds 0, 0, 2
+; 64BIT-NEXT:    fadds 3, 3, 4
+; 64BIT-NEXT:    fadds 0, 0, 5
+; 64BIT-NEXT:    lfs 4, 100(1)
+; 64BIT-NEXT:    lfs 5, 108(1)
+; 64BIT-NEXT:    fadds 3, 3, 1
+; 64BIT-NEXT:    fadds 0, 0, 2
+; 64BIT-NEXT:    fadds 3, 3, 4
+; 64BIT-NEXT:    fadds 0, 0, 5
+; 64BIT-NEXT:    lfs 4, 116(1)
+; 64BIT-NEXT:    lfs 5, 124(1)
+; 64BIT-NEXT:    fadds 3, 3, 1
+; 64BIT-NEXT:    fadds 0, 0, 2
+; 64BIT-NEXT:    fadds 3, 3, 4
+; 64BIT-NEXT:    fadds 0, 0, 5
+; 64BIT-NEXT:    fadds 1, 3, 1
+; 64BIT-NEXT:    fadds 0, 0, 2
+; 64BIT-NEXT:    lfs 2, 132(1)
+; 64BIT-NEXT:    lfs 3, 140(1)
+; 64BIT-NEXT:    fadds 1, 1, 2
+; 64BIT-NEXT:    fadds 2, 0, 3
+; 64BIT-NEXT:    blr
+entry:
+  %arg = alloca i8*, align 8
+  %0 = bitcast i8** %arg to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %add.r = fadd float %x.coerce0, 0.000000e+00
+  %add.i = fadd float %x.coerce1, 0.000000e+00
+  %argp.cur = load i8*, i8** %arg, align 8
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur, i64 16
+  store i8* %argp.next, i8** %arg, align 8
+  %1 = getelementptr inbounds i8, i8* %argp.cur, i64 4
+  %2 = getelementptr inbounds i8, i8* %argp.cur, i64 12
+  %3 = bitcast i8* %1 to float*
+  %4 = bitcast i8* %2 to float*
+  %.vareal = load float, float* %3, align 4
+  %.vaimag = load float, float* %4, align 4
+  %add.r6 = fadd float %add.r, %.vareal
+  %add.i7 = fadd float %add.i, %.vaimag
+  %add.r.1 = fadd float %add.r6, %x.coerce0
+  %add.i.1 = fadd float %add.i7, %x.coerce1
+  %5 = getelementptr inbounds i8, i8* %argp.cur, i64 20
+  %6 = getelementptr inbounds i8, i8* %argp.cur, i64 28
+  %7 = bitcast i8* %5 to float*
+  %8 = bitcast i8* %6 to float*
+  %.vareal.1 = load float, float* %7, align 4
+  %.vaimag.1 = load float, float* %8, align 4
+  %add.r6.1 = fadd float %add.r.1, %.vareal.1
+  %add.i7.1 = fadd float %add.i.1, %.vaimag.1
+  %add.r.2 = fadd float %add.r6.1, %x.coerce0
+  %add.i.2 = fadd float %add.i7.1, %x.coerce1
+  %9 = getelementptr inbounds i8, i8* %argp.cur, i64 36
+  %10 = getelementptr inbounds i8, i8* %argp.cur, i64 44
+  %11 = bitcast i8* %9 to float*
+  %12 = bitcast i8* %10 to float*
+  %.vareal.2 = load float, float* %11, align 4
+  %.vaimag.2 = load float, float* %12, align 4
+  %add.r6.2 = fadd float %add.r.2, %.vareal.2
+  %add.i7.2 = fadd float %add.i.2, %.vaimag.2
+  %add.r.3 = fadd float %add.r6.2, %x.coerce0
+  %add.i.3 = fadd float %add.i7.2, %x.coerce1
+  %13 = getelementptr inbounds i8, i8* %argp.cur, i64 52
+  %14 = getelementptr inbounds i8, i8* %argp.cur, i64 60
+  %15 = bitcast i8* %13 to float*
+  %16 = bitcast i8* %14 to float*
+  %.vareal.3 = load float, float* %15, align 4
+  %.vaimag.3 = load float, float* %16, align 4
+  %add.r6.3 = fadd float %add.r.3, %.vareal.3
+  %add.i7.3 = fadd float %add.i.3, %.vaimag.3
+  %add.r.4 = fadd float %add.r6.3, %x.coerce0
+  %add.i.4 = fadd float %add.i7.3, %x.coerce1
+  %argp.next.4 = getelementptr inbounds i8, i8* %argp.cur, i64 80
+  store i8* %argp.next.4, i8** %arg, align 8
+  %17 = getelementptr inbounds i8, i8* %argp.cur, i64 68
+  %18 = getelementptr inbounds i8, i8* %argp.cur, i64 76
+  %19 = bitcast i8* %17 to float*
+  %20 = bitcast i8* %18 to float*
+  %.vareal.4 = load float, float* %19, align 4
+  %.vaimag.4 = load float, float* %20, align 4
+  %add.r6.4 = fadd float %add.r.4, %.vareal.4
+  %add.i7.4 = fadd float %add.i.4, %.vaimag.4
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  %.fca.0.insert = insertvalue { float, float } undef, float %add.r6.4, 0
+  %.fca.1.insert = insertvalue { float, float } %.fca.0.insert, float %add.i7.4, 1
+  ret { float, float } %.fca.1.insert
+}
+
+define { double, double } @foo3(double %x.coerce0, double %x.coerce1, ...) local_unnamed_addr {
+; 64BIT-LABEL: foo3:
+; 64BIT:       # %bb.0: # %entry
+; 64BIT-NEXT:    ld 3, L..C2(2) # %const.0
+; 64BIT-NEXT:    std 5, 64(1)
+; 64BIT-NEXT:    std 6, 72(1)
+; 64BIT-NEXT:    lfd 4, 64(1)
+; 64BIT-NEXT:    lfd 5, 72(1)
+; 64BIT-NEXT:    std 7, 80(1)
+; 64BIT-NEXT:    std 8, 88(1)
+; 64BIT-NEXT:    lfs 0, 0(3)
+; 64BIT-NEXT:    std 9, 96(1)
+; 64BIT-NEXT:    addi 3, 1, 64
+; 64BIT-NEXT:    std 10, 104(1)
+; 64BIT-NEXT:    fadd 3, 1, 0
+; 64BIT-NEXT:    fadd 0, 2, 0
+; 64BIT-NEXT:    fadd 3, 3, 4
+; 64BIT-NEXT:    fadd 0, 0, 5
+; 64BIT-NEXT:    lfdu 4, 16(3)
+; 64BIT-NEXT:    lfd 5, 88(1)
+; 64BIT-NEXT:    fadd 3, 3, 1
+; 64BIT-NEXT:    fadd 0, 0, 2
+; 64BIT-NEXT:    fadd 3, 3, 4
+; 64BIT-NEXT:    fadd 0, 0, 5
+; 64BIT-NEXT:    lfd 4, 96(1)
+; 64BIT-NEXT:    lfd 5, 104(1)
+; 64BIT-NEXT:    fadd 3, 3, 1
+; 64BIT-NEXT:    fadd 0, 0, 2
+; 64BIT-NEXT:    fadd 3, 3, 4
+; 64BIT-NEXT:    fadd 0, 0, 5
+; 64BIT-NEXT:    lfd 4, 112(1)
+; 64BIT-NEXT:    lfd 5, 120(1)
+; 64BIT-NEXT:    fadd 3, 3, 1
+; 64BIT-NEXT:    fadd 0, 0, 2
+; 64BIT-NEXT:    fadd 3, 3, 4
+; 64BIT-NEXT:    fadd 0, 0, 5
+; 64BIT-NEXT:    fadd 1, 3, 1
+; 64BIT-NEXT:    fadd 0, 0, 2
+; 64BIT-NEXT:    lfd 2, 128(1)
+; 64BIT-NEXT:    lfd 3, 136(1)
+; 64BIT-NEXT:    fadd 1, 1, 2
+; 64BIT-NEXT:    fadd 2, 0, 3
+; 64BIT-NEXT:    blr
+entry:
+  %arg = alloca i8*, align 8
+  %0 = bitcast i8** %arg to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %add.r = fadd double %x.coerce0, 0.000000e+00
+  %add.i = fadd double %x.coerce1, 0.000000e+00
+  %argp.cur = load i8*, i8** %arg, align 8
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur, i64 16
+  store i8* %argp.next, i8** %arg, align 8
+  %.realp = bitcast i8* %argp.cur to double*
+  %.real = load double, double* %.realp, align 8
+  %.imagp = getelementptr inbounds i8, i8* %argp.cur, i64 8
+  %1 = bitcast i8* %.imagp to double*
+  %.imag = load double, double* %1, align 8
+  %add.r4 = fadd double %add.r, %.real
+  %add.i5 = fadd double %add.i, %.imag
+  %add.r.1 = fadd double %add.r4, %x.coerce0
+  %add.i.1 = fadd double %add.i5, %x.coerce1
+  %argp.next.1 = getelementptr inbounds i8, i8* %argp.cur, i64 32
+  %.realp.1 = bitcast i8* %argp.next to double*
+  %.real.1 = load double, double* %.realp.1, align 8
+  %.imagp.1 = getelementptr inbounds i8, i8* %argp.cur, i64 24
+  %2 = bitcast i8* %.imagp.1 to double*
+  %.imag.1 = load double, double* %2, align 8
+  %add.r4.1 = fadd double %add.r.1, %.real.1
+  %add.i5.1 = fadd double %add.i.1, %.imag.1
+  %add.r.2 = fadd double %add.r4.1, %x.coerce0
+  %add.i.2 = fadd double %add.i5.1, %x.coerce1
+  %argp.next.2 = getelementptr inbounds i8, i8* %argp.cur, i64 48
+  %.realp.2 = bitcast i8* %argp.next.1 to double*
+  %.real.2 = load double, double* %.realp.2, align 8
+  %.imagp.2 = getelementptr inbounds i8, i8* %argp.cur, i64 40
+  %3 = bitcast i8* %.imagp.2 to double*
+  %.imag.2 = load double, double* %3, align 8
+  %add.r4.2 = fadd double %add.r.2, %.real.2
+  %add.i5.2 = fadd double %add.i.2, %.imag.2
+  %add.r.3 = fadd double %add.r4.2, %x.coerce0
+  %add.i.3 = fadd double %add.i5.2, %x.coerce1
+  %argp.next.3 = getelementptr inbounds i8, i8* %argp.cur, i64 64
+  %.realp.3 = bitcast i8* %argp.next.2 to double*
+  %.real.3 = load double, double* %.realp.3, align 8
+  %.imagp.3 = getelementptr inbounds i8, i8* %argp.cur, i64 56
+  %4 = bitcast i8* %.imagp.3 to double*
+  %.imag.3 = load double, double* %4, align 8
+  %add.r4.3 = fadd double %add.r.3, %.real.3
+  %add.i5.3 = fadd double %add.i.3, %.imag.3
+  %add.r.4 = fadd double %add.r4.3, %x.coerce0
+  %add.i.4 = fadd double %add.i5.3, %x.coerce1
+  %argp.next.4 = getelementptr inbounds i8, i8* %argp.cur, i64 80
+  store i8* %argp.next.4, i8** %arg, align 8
+  %.realp.4 = bitcast i8* %argp.next.3 to double*
+  %.real.4 = load double, double* %.realp.4, align 8
+  %.imagp.4 = getelementptr inbounds i8, i8* %argp.cur, i64 72
+  %5 = bitcast i8* %.imagp.4 to double*
+  %.imag.4 = load double, double* %5, align 8
+  %add.r4.4 = fadd double %add.r.4, %.real.4
+  %add.i5.4 = fadd double %add.i.4, %.imag.4
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  %.fca.0.insert = insertvalue { double, double } undef, double %add.r4.4, 0
+  %.fca.1.insert = insertvalue { double, double } %.fca.0.insert, double %add.i5.4, 1
+  ret { double, double } %.fca.1.insert
+}
+
+define void @bar1() local_unnamed_addr {
+; 64BIT-LABEL: bar1:
+; 64BIT:       # %bb.0: # %entry
+; 64BIT-NEXT:    mflr 0
+; 64BIT-NEXT:    std 0, 16(1)
+; 64BIT-NEXT:    stdu 1, -176(1)
+; 64BIT-NEXT:    ld 3, L..C3(2) # @cdbl
+; 64BIT-NEXT:    ld 5, L..C4(2) # @cdbl1
+; 64BIT-NEXT:    ld 7, L..C5(2) # @cdbl2
+; 64BIT-NEXT:    ld 9, L..C6(2) # @cdbl3
+; 64BIT-NEXT:    lfd 1, 0(3)
+; 64BIT-NEXT:    stfd 1, 112(1)
+; 64BIT-NEXT:    lfd 2, 8(3)
+; 64BIT-NEXT:    ld 3, 112(1)
+; 64BIT-NEXT:    stfd 2, 144(1)
+; 64BIT-NEXT:    lfd 3, 0(5)
+; 64BIT-NEXT:    ld 4, 144(1)
+; 64BIT-NEXT:    stfd 3, 120(1)
+; 64BIT-NEXT:    lfd 4, 8(5)
+; 64BIT-NEXT:    ld 5, 120(1)
+; 64BIT-NEXT:    stfd 4, 152(1)
+; 64BIT-NEXT:    lfd 5, 0(7)
+; 64BIT-NEXT:    ld 6, 152(1)
+; 64BIT-NEXT:    stfd 5, 128(1)
+; 64BIT-NEXT:    lfd 6, 8(7)
+; 64BIT-NEXT:    ld 7, 128(1)
+; 64BIT-NEXT:    stfd 6, 160(1)
+; 64BIT-NEXT:    lfd 7, 0(9)
+; 64BIT-NEXT:    ld 8, 160(1)
+; 64BIT-NEXT:    stfd 7, 136(1)
+; 64BIT-NEXT:    lfd 8, 8(9)
+; 64BIT-NEXT:    ld 9, 136(1)
+; 64BIT-NEXT:    stfd 8, 168(1)
+; 64BIT-NEXT:    ld 10, 168(1)
+; 64BIT-NEXT:    bl .foo1
+; 64BIT-NEXT:    nop
+; 64BIT-NEXT:    addi 1, 1, 176
+; 64BIT-NEXT:    ld 0, 16(1)
+; 64BIT-NEXT:    mtlr 0
+; 64BIT-NEXT:    blr
+entry:
+  %cdbl.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl, i64 0, i32 0), align 8
+  %cdbl.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl, i64 0, i32 1), align 8
+  %cdbl1.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl1, i64 0, i32 0), align 8
+  %cdbl1.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl1, i64 0, i32 1), align 8
+  %cdbl2.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl2, i64 0, i32 0), align 8
+  %cdbl2.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl2, i64 0, i32 1), align 8
+  %cdbl3.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl3, i64 0, i32 0), align 8
+  %cdbl3.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl3, i64 0, i32 1), align 8
+  %call = tail call { double, double } (double, double, ...) @foo1(double %cdbl.real, double %cdbl.imag, double %cdbl1.real, double %cdbl1.imag, double %cdbl2.real, double %cdbl2.imag, double %cdbl3.real, double %cdbl3.imag)
+  ret void
+}
+
+define void @bar2() local_unnamed_addr {
+; 64BIT-LABEL: bar2:
+; 64BIT:       # %bb.0: # %entry
+; 64BIT-NEXT:    mflr 0
+; 64BIT-NEXT:    std 0, 16(1)
+; 64BIT-NEXT:    stdu 1, -144(1)
+; 64BIT-NEXT:    ld 3, L..C7(2) # @cflt
+; 64BIT-NEXT:    ld 5, L..C8(2) # @cflt1
+; 64BIT-NEXT:    ld 7, L..C9(2) # @cflt2
+; 64BIT-NEXT:    ld 9, L..C10(2) # @cflt3
+; 64BIT-NEXT:    lfs 1, 0(3)
+; 64BIT-NEXT:    stfs 1, 112(1)
+; 64BIT-NEXT:    lfs 2, 4(3)
+; 64BIT-NEXT:    lwz 3, 112(1)
+; 64BIT-NEXT:    stfs 2, 128(1)
+; 64BIT-NEXT:    lfs 3, 0(5)
+; 64BIT-NEXT:    lwz 4, 128(1)
+; 64BIT-NEXT:    stfs 3, 116(1)
+; 64BIT-NEXT:    lfs 4, 4(5)
+; 64BIT-NEXT:    lwz 5, 116(1)
+; 64BIT-NEXT:    stfs 4, 132(1)
+; 64BIT-NEXT:    lfs 5, 0(7)
+; 64BIT-NEXT:    lwz 6, 132(1)
+; 64BIT-NEXT:    stfs 5, 120(1)
+; 64BIT-NEXT:    lfs 6, 4(7)
+; 64BIT-NEXT:    lwz 7, 120(1)
+; 64BIT-NEXT:    stfs 6, 136(1)
+; 64BIT-NEXT:    lfs 7, 0(9)
+; 64BIT-NEXT:    lwz 8, 136(1)
+; 64BIT-NEXT:    stfs 7, 124(1)
+; 64BIT-NEXT:    lfs 8, 4(9)
+; 64BIT-NEXT:    lwz 9, 124(1)
+; 64BIT-NEXT:    stfs 8, 140(1)
+; 64BIT-NEXT:    lwz 10, 140(1)
+; 64BIT-NEXT:    bl .foo2
+; 64BIT-NEXT:    nop
+; 64BIT-NEXT:    addi 1, 1, 144
+; 64BIT-NEXT:    ld 0, 16(1)
+; 64BIT-NEXT:    mtlr 0
+; 64BIT-NEXT:    blr
+entry:
+  %cflt.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt, i64 0, i32 0), align 4
+  %cflt.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt, i64 0, i32 1), align 4
+  %cflt1.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt1, i64 0, i32 0), align 4
+  %cflt1.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt1, i64 0, i32 1), align 4
+  %cflt2.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt2, i64 0, i32 0), align 4
+  %cflt2.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt2, i64 0, i32 1), align 4
+  %cflt3.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt3, i64 0, i32 0), align 4
+  %cflt3.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt3, i64 0, i32 1), align 4
+  %call = tail call { float, float } (float, float, ...) @foo2(float %cflt.real, float %cflt.imag, float %cflt1.real, float %cflt1.imag, float %cflt2.real, float %cflt2.imag, float %cflt3.real, float %cflt3.imag)
+  ret void
+}
+
+define void @bar3() local_unnamed_addr {
+; 64BIT-LABEL: bar3:
+; 64BIT:       # %bb.0: # %entry
+; 64BIT-NEXT:    mflr 0
+; 64BIT-NEXT:    std 0, 16(1)
+; 64BIT-NEXT:    stdu 1, -176(1)
+; 64BIT-NEXT:    ld 3, L..C11(2) # @cldbl
+; 64BIT-NEXT:    ld 5, L..C12(2) # @cldbl1
+; 64BIT-NEXT:    ld 7, L..C13(2) # @cldbl2
+; 64BIT-NEXT:    ld 9, L..C14(2) # @cldbl3
+; 64BIT-NEXT:    lfd 1, 0(3)
+; 64BIT-NEXT:    stfd 1, 112(1)
+; 64BIT-NEXT:    lfd 2, 8(3)
+; 64BIT-NEXT:    ld 3, 112(1)
+; 64BIT-NEXT:    stfd 2, 144(1)
+; 64BIT-NEXT:    lfd 3, 0(5)
+; 64BIT-NEXT:    ld 4, 144(1)
+; 64BIT-NEXT:    stfd 3, 120(1)
+; 64BIT-NEXT:    lfd 4, 8(5)
+; 64BIT-NEXT:    ld 5, 120(1)
+; 64BIT-NEXT:    stfd 4, 152(1)
+; 64BIT-NEXT:    lfd 5, 0(7)
+; 64BIT-NEXT:    ld 6, 152(1)
+; 64BIT-NEXT:    stfd 5, 128(1)
+; 64BIT-NEXT:    lfd 6, 8(7)
+; 64BIT-NEXT:    ld 7, 128(1)
+; 64BIT-NEXT:    stfd 6, 160(1)
+; 64BIT-NEXT:    lfd 7, 0(9)
+; 64BIT-NEXT:    ld 8, 160(1)
+; 64BIT-NEXT:    stfd 7, 136(1)
+; 64BIT-NEXT:    lfd 8, 8(9)
+; 64BIT-NEXT:    ld 9, 136(1)
+; 64BIT-NEXT:    stfd 8, 168(1)
+; 64BIT-NEXT:    ld 10, 168(1)
+; 64BIT-NEXT:    bl .foo3
+; 64BIT-NEXT:    nop
+; 64BIT-NEXT:    addi 1, 1, 176
+; 64BIT-NEXT:    ld 0, 16(1)
+; 64BIT-NEXT:    mtlr 0
+; 64BIT-NEXT:    blr
+entry:
+  %cldbl.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl, i64 0, i32 0), align 8
+  %cldbl.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl, i64 0, i32 1), align 8
+  %cldbl1.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl1, i64 0, i32 0), align 8
+  %cldbl1.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl1, i64 0, i32 1), align 8
+  %cldbl2.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl2, i64 0, i32 0), align 8
+  %cldbl2.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl2, i64 0, i32 1), align 8
+  %cldbl3.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl3, i64 0, i32 0), align 8
+  %cldbl3.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl3, i64 0, i32 1), align 8
+  %call = tail call { double, double } (double, double, ...) @foo3(double %cldbl.real, double %cldbl.imag, double %cldbl1.real, double %cldbl1.imag, double %cldbl2.real, double %cldbl2.imag, double %cldbl3.real, double %cldbl3.imag)
+  ret void
+}
+
Index: llvm/test/CodeGen/PowerPC/aix32-complex-vararg.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/aix32-complex-vararg.ll
@@ -0,0 +1,503 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \
+; RUN:  -mtriple powerpc-ibm-aix-xcoff < %s | \
+; RUN: FileCheck --check-prefix=32BIT %s
+
+@cdbl = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cdbl1 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cdbl2 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cdbl3 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cflt = local_unnamed_addr global { float, float } zeroinitializer, align 4
+@cflt1 = local_unnamed_addr global { float, float } zeroinitializer, align 4
+@cflt2 = local_unnamed_addr global { float, float } zeroinitializer, align 4
+@cflt3 = local_unnamed_addr global { float, float } zeroinitializer, align 4
+@cldbl = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cldbl1 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cldbl2 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+@cldbl3 = local_unnamed_addr global { double, double } zeroinitializer, align 8
+
+define { double, double } @foo1(double %x.coerce0, double %x.coerce1, ...) local_unnamed_addr {
+; 32BIT-LABEL: foo1:
+; 32BIT:       # %bb.0: # %entry
+; 32BIT-NEXT:    lwz 3, L..C0(2) # %const.0
+; 32BIT-NEXT:    stw 7, 40(1)
+; 32BIT-NEXT:    stw 8, 44(1)
+; 32BIT-NEXT:    stw 9, 48(1)
+; 32BIT-NEXT:    stw 10, 52(1)
+; 32BIT-NEXT:    lfd 4, 40(1)
+; 32BIT-NEXT:    lfd 5, 48(1)
+; 32BIT-NEXT:    lfs 0, 0(3)
+; 32BIT-NEXT:    fadd 3, 1, 0
+; 32BIT-NEXT:    fadd 0, 2, 0
+; 32BIT-NEXT:    fadd 3, 3, 4
+; 32BIT-NEXT:    fadd 0, 0, 5
+; 32BIT-NEXT:    lfd 4, 56(1)
+; 32BIT-NEXT:    lfd 5, 64(1)
+; 32BIT-NEXT:    fadd 3, 3, 1
+; 32BIT-NEXT:    fadd 0, 0, 2
+; 32BIT-NEXT:    fadd 3, 3, 4
+; 32BIT-NEXT:    fadd 0, 0, 5
+; 32BIT-NEXT:    lfd 4, 72(1)
+; 32BIT-NEXT:    lfd 5, 80(1)
+; 32BIT-NEXT:    fadd 3, 3, 1
+; 32BIT-NEXT:    fadd 0, 0, 2
+; 32BIT-NEXT:    fadd 3, 3, 4
+; 32BIT-NEXT:    fadd 0, 0, 5
+; 32BIT-NEXT:    lfd 4, 88(1)
+; 32BIT-NEXT:    lfd 5, 96(1)
+; 32BIT-NEXT:    fadd 3, 3, 1
+; 32BIT-NEXT:    fadd 0, 0, 2
+; 32BIT-NEXT:    fadd 3, 3, 4
+; 32BIT-NEXT:    fadd 0, 0, 5
+; 32BIT-NEXT:    fadd 1, 3, 1
+; 32BIT-NEXT:    fadd 0, 0, 2
+; 32BIT-NEXT:    lfd 2, 104(1)
+; 32BIT-NEXT:    lfd 3, 112(1)
+; 32BIT-NEXT:    fadd 1, 1, 2
+; 32BIT-NEXT:    fadd 2, 0, 3
+; 32BIT-NEXT:    blr
+entry:
+  %arg = alloca i8*, align 4
+  %0 = bitcast i8** %arg to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %arg.promoted = load i8*, i8** %arg, align 4
+  %add.r = fadd double %x.coerce0, 0.000000e+00
+  %add.i = fadd double %x.coerce1, 0.000000e+00
+  %argp.next = getelementptr inbounds i8, i8* %arg.promoted, i32 16
+  %.realp = bitcast i8* %arg.promoted to double*
+  %.real = load double, double* %.realp, align 4
+  %.imagp = getelementptr inbounds i8, i8* %arg.promoted, i32 8
+  %1 = bitcast i8* %.imagp to double*
+  %.imag = load double, double* %1, align 4
+  %add.r4 = fadd double %add.r, %.real
+  %add.i5 = fadd double %add.i, %.imag
+  %add.r.1 = fadd double %add.r4, %x.coerce0
+  %add.i.1 = fadd double %add.i5, %x.coerce1
+  %argp.next.1 = getelementptr inbounds i8, i8* %arg.promoted, i32 32
+  %.realp.1 = bitcast i8* %argp.next to double*
+  %.real.1 = load double, double* %.realp.1, align 4
+  %.imagp.1 = getelementptr inbounds i8, i8* %arg.promoted, i32 24
+  %2 = bitcast i8* %.imagp.1 to double*
+  %.imag.1 = load double, double* %2, align 4
+  %add.r4.1 = fadd double %add.r.1, %.real.1
+  %add.i5.1 = fadd double %add.i.1, %.imag.1
+  %add.r.2 = fadd double %add.r4.1, %x.coerce0
+  %add.i.2 = fadd double %add.i5.1, %x.coerce1
+  %argp.next.2 = getelementptr inbounds i8, i8* %arg.promoted, i32 48
+  %.realp.2 = bitcast i8* %argp.next.1 to double*
+  %.real.2 = load double, double* %.realp.2, align 4
+  %.imagp.2 = getelementptr inbounds i8, i8* %arg.promoted, i32 40
+  %3 = bitcast i8* %.imagp.2 to double*
+  %.imag.2 = load double, double* %3, align 4
+  %add.r4.2 = fadd double %add.r.2, %.real.2
+  %add.i5.2 = fadd double %add.i.2, %.imag.2
+  %add.r.3 = fadd double %add.r4.2, %x.coerce0
+  %add.i.3 = fadd double %add.i5.2, %x.coerce1
+  %argp.next.3 = getelementptr inbounds i8, i8* %arg.promoted, i32 64
+  %.realp.3 = bitcast i8* %argp.next.2 to double*
+  %.real.3 = load double, double* %.realp.3, align 4
+  %.imagp.3 = getelementptr inbounds i8, i8* %arg.promoted, i32 56
+  %4 = bitcast i8* %.imagp.3 to double*
+  %.imag.3 = load double, double* %4, align 4
+  %add.r4.3 = fadd double %add.r.3, %.real.3
+  %add.i5.3 = fadd double %add.i.3, %.imag.3
+  %add.r.4 = fadd double %add.r4.3, %x.coerce0
+  %add.i.4 = fadd double %add.i5.3, %x.coerce1
+  %.realp.4 = bitcast i8* %argp.next.3 to double*
+  %.real.4 = load double, double* %.realp.4, align 4
+  %.imagp.4 = getelementptr inbounds i8, i8* %arg.promoted, i32 72
+  %5 = bitcast i8* %.imagp.4 to double*
+  %.imag.4 = load double, double* %5, align 4
+  %add.r4.4 = fadd double %add.r.4, %.real.4
+  %add.i5.4 = fadd double %add.i.4, %.imag.4
+  %scevgep = getelementptr i8, i8* %arg.promoted, i32 80
+  store i8* %scevgep, i8** %arg, align 4
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  %.fca.0.insert = insertvalue { double, double } undef, double %add.r4.4, 0
+  %.fca.1.insert = insertvalue { double, double } %.fca.0.insert, double %add.i5.4, 1
+  ret { double, double } %.fca.1.insert
+}
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+declare void @llvm.va_end(i8*)
+
+define { float, float } @foo2(float %x.coerce0, float %x.coerce1, ...) local_unnamed_addr {
+; 32BIT-LABEL: foo2:
+; 32BIT:       # %bb.0: # %entry
+; 32BIT-NEXT:    lwz 3, L..C1(2) # %const.0
+; 32BIT-NEXT:    stw 5, 32(1)
+; 32BIT-NEXT:    stw 6, 36(1)
+; 32BIT-NEXT:    lfs 4, 32(1)
+; 32BIT-NEXT:    lfs 5, 36(1)
+; 32BIT-NEXT:    stw 7, 40(1)
+; 32BIT-NEXT:    stw 8, 44(1)
+; 32BIT-NEXT:    lfs 0, 0(3)
+; 32BIT-NEXT:    stw 9, 48(1)
+; 32BIT-NEXT:    addi 3, 1, 32
+; 32BIT-NEXT:    stw 10, 52(1)
+; 32BIT-NEXT:    fadds 3, 1, 0
+; 32BIT-NEXT:    fadds 0, 2, 0
+; 32BIT-NEXT:    fadds 3, 3, 4
+; 32BIT-NEXT:    fadds 0, 0, 5
+; 32BIT-NEXT:    lfsu 4, 8(3)
+; 32BIT-NEXT:    lfs 5, 44(1)
+; 32BIT-NEXT:    fadds 3, 3, 1
+; 32BIT-NEXT:    fadds 0, 0, 2
+; 32BIT-NEXT:    fadds 3, 3, 4
+; 32BIT-NEXT:    fadds 0, 0, 5
+; 32BIT-NEXT:    lfs 4, 48(1)
+; 32BIT-NEXT:    lfs 5, 52(1)
+; 32BIT-NEXT:    fadds 3, 3, 1
+; 32BIT-NEXT:    fadds 0, 0, 2
+; 32BIT-NEXT:    fadds 3, 3, 4
+; 32BIT-NEXT:    fadds 0, 0, 5
+; 32BIT-NEXT:    lfs 4, 56(1)
+; 32BIT-NEXT:    lfs 5, 60(1)
+; 32BIT-NEXT:    fadds 3, 3, 1
+; 32BIT-NEXT:    fadds 0, 0, 2
+; 32BIT-NEXT:    fadds 3, 3, 4
+; 32BIT-NEXT:    fadds 0, 0, 5
+; 32BIT-NEXT:    fadds 1, 3, 1
+; 32BIT-NEXT:    fadds 0, 0, 2
+; 32BIT-NEXT:    lfs 2, 64(1)
+; 32BIT-NEXT:    lfs 3, 68(1)
+; 32BIT-NEXT:    fadds 1, 1, 2
+; 32BIT-NEXT:    fadds 2, 0, 3
+; 32BIT-NEXT:    blr
+entry:
+  %arg = alloca i8*, align 4
+  %0 = bitcast i8** %arg to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %add.r = fadd float %x.coerce0, 0.000000e+00
+  %add.i = fadd float %x.coerce1, 0.000000e+00
+  %argp.cur = load i8*, i8** %arg, align 4
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 8
+  store i8* %argp.next, i8** %arg, align 4
+  %.realp = bitcast i8* %argp.cur to float*
+  %.real = load float, float* %.realp, align 4
+  %.imagp = getelementptr inbounds i8, i8* %argp.cur, i32 4
+  %1 = bitcast i8* %.imagp to float*
+  %.imag = load float, float* %1, align 4
+  %add.r4 = fadd float %add.r, %.real
+  %add.i5 = fadd float %add.i, %.imag
+  %add.r.1 = fadd float %add.r4, %x.coerce0
+  %add.i.1 = fadd float %add.i5, %x.coerce1
+  %argp.next.1 = getelementptr inbounds i8, i8* %argp.cur, i32 16
+  %.realp.1 = bitcast i8* %argp.next to float*
+  %.real.1 = load float, float* %.realp.1, align 4
+  %.imagp.1 = getelementptr inbounds i8, i8* %argp.cur, i32 12
+  %2 = bitcast i8* %.imagp.1 to float*
+  %.imag.1 = load float, float* %2, align 4
+  %add.r4.1 = fadd float %add.r.1, %.real.1
+  %add.i5.1 = fadd float %add.i.1, %.imag.1
+  %add.r.2 = fadd float %add.r4.1, %x.coerce0
+  %add.i.2 = fadd float %add.i5.1, %x.coerce1
+  %argp.next.2 = getelementptr inbounds i8, i8* %argp.cur, i32 24
+  %.realp.2 = bitcast i8* %argp.next.1 to float*
+  %.real.2 = load float, float* %.realp.2, align 4
+  %.imagp.2 = getelementptr inbounds i8, i8* %argp.cur, i32 20
+  %3 = bitcast i8* %.imagp.2 to float*
+  %.imag.2 = load float, float* %3, align 4
+  %add.r4.2 = fadd float %add.r.2, %.real.2
+  %add.i5.2 = fadd float %add.i.2, %.imag.2
+  %add.r.3 = fadd float %add.r4.2, %x.coerce0
+  %add.i.3 = fadd float %add.i5.2, %x.coerce1
+  %argp.next.3 = getelementptr inbounds i8, i8* %argp.cur, i32 32
+  %.realp.3 = bitcast i8* %argp.next.2 to float*
+  %.real.3 = load float, float* %.realp.3, align 4
+  %.imagp.3 = getelementptr inbounds i8, i8* %argp.cur, i32 28
+  %4 = bitcast i8* %.imagp.3 to float*
+  %.imag.3 = load float, float* %4, align 4
+  %add.r4.3 = fadd float %add.r.3, %.real.3
+  %add.i5.3 = fadd float %add.i.3, %.imag.3
+  %add.r.4 = fadd float %add.r4.3, %x.coerce0
+  %add.i.4 = fadd float %add.i5.3, %x.coerce1
+  %argp.next.4 = getelementptr inbounds i8, i8* %argp.cur, i32 40
+  store i8* %argp.next.4, i8** %arg, align 4
+  %.realp.4 = bitcast i8* %argp.next.3 to float*
+  %.real.4 = load float, float* %.realp.4, align 4
+  %.imagp.4 = getelementptr inbounds i8, i8* %argp.cur, i32 36
+  %5 = bitcast i8* %.imagp.4 to float*
+  %.imag.4 = load float, float* %5, align 4
+  %add.r4.4 = fadd float %add.r.4, %.real.4
+  %add.i5.4 = fadd float %add.i.4, %.imag.4
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  %.fca.0.insert = insertvalue { float, float } undef, float %add.r4.4, 0
+  %.fca.1.insert = insertvalue { float, float } %.fca.0.insert, float %add.i5.4, 1
+  ret { float, float } %.fca.1.insert
+}
+
+define { double, double } @foo3(double %x.coerce0, double %x.coerce1, ...) local_unnamed_addr {
+; 32BIT-LABEL: foo3:
+; 32BIT:       # %bb.0: # %entry
+; 32BIT-NEXT:    lwz 3, L..C2(2) # %const.0
+; 32BIT-NEXT:    stw 7, 40(1)
+; 32BIT-NEXT:    stw 8, 44(1)
+; 32BIT-NEXT:    stw 9, 48(1)
+; 32BIT-NEXT:    stw 10, 52(1)
+; 32BIT-NEXT:    lfd 4, 40(1)
+; 32BIT-NEXT:    lfd 5, 48(1)
+; 32BIT-NEXT:    lfs 0, 0(3)
+; 32BIT-NEXT:    fadd 3, 1, 0
+; 32BIT-NEXT:    fadd 0, 2, 0
+; 32BIT-NEXT:    fadd 3, 3, 4
+; 32BIT-NEXT:    fadd 0, 0, 5
+; 32BIT-NEXT:    lfd 4, 56(1)
+; 32BIT-NEXT:    lfd 5, 64(1)
+; 32BIT-NEXT:    fadd 3, 3, 1
+; 32BIT-NEXT:    fadd 0, 0, 2
+; 32BIT-NEXT:    fadd 3, 3, 4
+; 32BIT-NEXT:    fadd 0, 0, 5
+; 32BIT-NEXT:    lfd 4, 72(1)
+; 32BIT-NEXT:    lfd 5, 80(1)
+; 32BIT-NEXT:    fadd 3, 3, 1
+; 32BIT-NEXT:    fadd 0, 0, 2
+; 32BIT-NEXT:    fadd 3, 3, 4
+; 32BIT-NEXT:    fadd 0, 0, 5
+; 32BIT-NEXT:    lfd 4, 88(1)
+; 32BIT-NEXT:    lfd 5, 96(1)
+; 32BIT-NEXT:    fadd 3, 3, 1
+; 32BIT-NEXT:    fadd 0, 0, 2
+; 32BIT-NEXT:    fadd 3, 3, 4
+; 32BIT-NEXT:    fadd 0, 0, 5
+; 32BIT-NEXT:    fadd 1, 3, 1
+; 32BIT-NEXT:    fadd 0, 0, 2
+; 32BIT-NEXT:    lfd 2, 104(1)
+; 32BIT-NEXT:    lfd 3, 112(1)
+; 32BIT-NEXT:    fadd 1, 1, 2
+; 32BIT-NEXT:    fadd 2, 0, 3
+; 32BIT-NEXT:    blr
+entry:
+  %arg = alloca i8*, align 4
+  %0 = bitcast i8** %arg to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.va_start(i8* nonnull %0)
+  %arg.promoted = load i8*, i8** %arg, align 4
+  %add.r = fadd double %x.coerce0, 0.000000e+00
+  %add.i = fadd double %x.coerce1, 0.000000e+00
+  %argp.next = getelementptr inbounds i8, i8* %arg.promoted, i32 16
+  %.realp = bitcast i8* %arg.promoted to double*
+  %.real = load double, double* %.realp, align 4
+  %.imagp = getelementptr inbounds i8, i8* %arg.promoted, i32 8
+  %1 = bitcast i8* %.imagp to double*
+  %.imag = load double, double* %1, align 4
+  %add.r4 = fadd double %add.r, %.real
+  %add.i5 = fadd double %add.i, %.imag
+  %add.r.1 = fadd double %add.r4, %x.coerce0
+  %add.i.1 = fadd double %add.i5, %x.coerce1
+  %argp.next.1 = getelementptr inbounds i8, i8* %arg.promoted, i32 32
+  %.realp.1 = bitcast i8* %argp.next to double*
+  %.real.1 = load double, double* %.realp.1, align 4
+  %.imagp.1 = getelementptr inbounds i8, i8* %arg.promoted, i32 24
+  %2 = bitcast i8* %.imagp.1 to double*
+  %.imag.1 = load double, double* %2, align 4
+  %add.r4.1 = fadd double %add.r.1, %.real.1
+  %add.i5.1 = fadd double %add.i.1, %.imag.1
+  %add.r.2 = fadd double %add.r4.1, %x.coerce0
+  %add.i.2 = fadd double %add.i5.1, %x.coerce1
+  %argp.next.2 = getelementptr inbounds i8, i8* %arg.promoted, i32 48
+  %.realp.2 = bitcast i8* %argp.next.1 to double*
+  %.real.2 = load double, double* %.realp.2, align 4
+  %.imagp.2 = getelementptr inbounds i8, i8* %arg.promoted, i32 40
+  %3 = bitcast i8* %.imagp.2 to double*
+  %.imag.2 = load double, double* %3, align 4
+  %add.r4.2 = fadd double %add.r.2, %.real.2
+  %add.i5.2 = fadd double %add.i.2, %.imag.2
+  %add.r.3 = fadd double %add.r4.2, %x.coerce0
+  %add.i.3 = fadd double %add.i5.2, %x.coerce1
+  %argp.next.3 = getelementptr inbounds i8, i8* %arg.promoted, i32 64
+  %.realp.3 = bitcast i8* %argp.next.2 to double*
+  %.real.3 = load double, double* %.realp.3, align 4
+  %.imagp.3 = getelementptr inbounds i8, i8* %arg.promoted, i32 56
+  %4 = bitcast i8* %.imagp.3 to double*
+  %.imag.3 = load double, double* %4, align 4
+  %add.r4.3 = fadd double %add.r.3, %.real.3
+  %add.i5.3 = fadd double %add.i.3, %.imag.3
+  %add.r.4 = fadd double %add.r4.3, %x.coerce0
+  %add.i.4 = fadd double %add.i5.3, %x.coerce1
+  %.realp.4 = bitcast i8* %argp.next.3 to double*
+  %.real.4 = load double, double* %.realp.4, align 4
+  %.imagp.4 = getelementptr inbounds i8, i8* %arg.promoted, i32 72
+  %5 = bitcast i8* %.imagp.4 to double*
+  %.imag.4 = load double, double* %5, align 4
+  %add.r4.4 = fadd double %add.r.4, %.real.4
+  %add.i5.4 = fadd double %add.i.4, %.imag.4
+  %scevgep = getelementptr i8, i8* %arg.promoted, i32 80
+  store i8* %scevgep, i8** %arg, align 4
+  call void @llvm.va_end(i8* nonnull %0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  %.fca.0.insert = insertvalue { double, double } undef, double %add.r4.4, 0
+  %.fca.1.insert = insertvalue { double, double } %.fca.0.insert, double %add.i5.4, 1
+  ret { double, double } %.fca.1.insert
+}
+
+; Function Attrs: nofree nosync nounwind
+define void @bar1() local_unnamed_addr {
+; 32BIT-LABEL: bar1:
+; 32BIT:       # %bb.0: # %entry
+; 32BIT-NEXT:    mflr 0
+; 32BIT-NEXT:    stw 0, 8(1)
+; 32BIT-NEXT:    stwu 1, -128(1)
+; 32BIT-NEXT:    lwz 3, L..C3(2) # @cdbl
+; 32BIT-NEXT:    lwz 7, L..C4(2) # @cdbl1
+; 32BIT-NEXT:    lwz 9, L..C5(2) # @cdbl2
+; 32BIT-NEXT:    lwz 10, L..C6(2) # @cdbl3
+; 32BIT-NEXT:    lfd 1, 0(3)
+; 32BIT-NEXT:    stfd 1, 120(1)
+; 32BIT-NEXT:    lfd 2, 8(3)
+; 32BIT-NEXT:    lwz 3, 120(1)
+; 32BIT-NEXT:    lwz 4, 124(1)
+; 32BIT-NEXT:    stfd 2, 112(1)
+; 32BIT-NEXT:    lfd 3, 0(7)
+; 32BIT-NEXT:    lwz 5, 112(1)
+; 32BIT-NEXT:    lwz 6, 116(1)
+; 32BIT-NEXT:    stfd 3, 104(1)
+; 32BIT-NEXT:    lfd 4, 8(7)
+; 32BIT-NEXT:    lwz 7, 104(1)
+; 32BIT-NEXT:    lwz 8, 108(1)
+; 32BIT-NEXT:    stfd 4, 96(1)
+; 32BIT-NEXT:    lfd 5, 0(9)
+; 32BIT-NEXT:    lfd 6, 8(9)
+; 32BIT-NEXT:    lfd 7, 0(10)
+; 32BIT-NEXT:    lfd 8, 8(10)
+; 32BIT-NEXT:    lwz 9, 96(1)
+; 32BIT-NEXT:    lwz 10, 100(1)
+; 32BIT-NEXT:    stfd 6, 64(1)
+; 32BIT-NEXT:    stfd 5, 56(1)
+; 32BIT-NEXT:    stfd 8, 80(1)
+; 32BIT-NEXT:    stfd 7, 72(1)
+; 32BIT-NEXT:    bl .foo1
+; 32BIT-NEXT:    nop
+; 32BIT-NEXT:    addi 1, 1, 128
+; 32BIT-NEXT:    lwz 0, 8(1)
+; 32BIT-NEXT:    mtlr 0
+; 32BIT-NEXT:    blr
+entry:
+  %cdbl.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl, i32 0, i32 0), align 8
+  %cdbl.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl, i32 0, i32 1), align 8
+  %cdbl1.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl1, i32 0, i32 0), align 8
+  %cdbl1.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl1, i32 0, i32 1), align 8
+  %cdbl2.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl2, i32 0, i32 0), align 8
+  %cdbl2.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl2, i32 0, i32 1), align 8
+  %cdbl3.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl3, i32 0, i32 0), align 8
+  %cdbl3.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cdbl3, i32 0, i32 1), align 8
+  %call = tail call { double, double } (double, double, ...) @foo1(double %cdbl.real, double %cdbl.imag, double %cdbl1.real, double %cdbl1.imag, double %cdbl2.real, double %cdbl2.imag, double %cdbl3.real, double %cdbl3.imag)
+  ret void
+}
+
+; Function Attrs: nofree nosync nounwind
+define void @bar2() local_unnamed_addr {
+; 32BIT-LABEL: bar2:
+; 32BIT:       # %bb.0: # %entry
+; 32BIT-NEXT:    mflr 0
+; 32BIT-NEXT:    stw 0, 8(1)
+; 32BIT-NEXT:    stwu 1, -96(1)
+; 32BIT-NEXT:    lwz 3, L..C7(2) # @cflt
+; 32BIT-NEXT:    lwz 5, L..C8(2) # @cflt1
+; 32BIT-NEXT:    lwz 7, L..C9(2) # @cflt2
+; 32BIT-NEXT:    lwz 9, L..C10(2) # @cflt3
+; 32BIT-NEXT:    lfs 1, 0(3)
+; 32BIT-NEXT:    stfs 1, 64(1)
+; 32BIT-NEXT:    lfs 2, 4(3)
+; 32BIT-NEXT:    lwz 3, 64(1)
+; 32BIT-NEXT:    stfs 2, 80(1)
+; 32BIT-NEXT:    lfs 3, 0(5)
+; 32BIT-NEXT:    lwz 4, 80(1)
+; 32BIT-NEXT:    stfs 3, 68(1)
+; 32BIT-NEXT:    lfs 4, 4(5)
+; 32BIT-NEXT:    lwz 5, 68(1)
+; 32BIT-NEXT:    stfs 4, 84(1)
+; 32BIT-NEXT:    lfs 5, 0(7)
+; 32BIT-NEXT:    lwz 6, 84(1)
+; 32BIT-NEXT:    stfs 5, 72(1)
+; 32BIT-NEXT:    lfs 6, 4(7)
+; 32BIT-NEXT:    lwz 7, 72(1)
+; 32BIT-NEXT:    stfs 6, 88(1)
+; 32BIT-NEXT:    lfs 7, 0(9)
+; 32BIT-NEXT:    lwz 8, 88(1)
+; 32BIT-NEXT:    stfs 7, 76(1)
+; 32BIT-NEXT:    lfs 8, 4(9)
+; 32BIT-NEXT:    lwz 9, 76(1)
+; 32BIT-NEXT:    stfs 8, 92(1)
+; 32BIT-NEXT:    lwz 10, 92(1)
+; 32BIT-NEXT:    bl .foo2
+; 32BIT-NEXT:    nop
+; 32BIT-NEXT:    addi 1, 1, 96
+; 32BIT-NEXT:    lwz 0, 8(1)
+; 32BIT-NEXT:    mtlr 0
+; 32BIT-NEXT:    blr
+entry:
+  %cflt.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt, i32 0, i32 0), align 4
+  %cflt.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt, i32 0, i32 1), align 4
+  %cflt1.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt1, i32 0, i32 0), align 4
+  %cflt1.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt1, i32 0, i32 1), align 4
+  %cflt2.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt2, i32 0, i32 0), align 4
+  %cflt2.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt2, i32 0, i32 1), align 4
+  %cflt3.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt3, i32 0, i32 0), align 4
+  %cflt3.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cflt3, i32 0, i32 1), align 4
+  %call = tail call { float, float } (float, float, ...) @foo2(float %cflt.real, float %cflt.imag, float %cflt1.real, float %cflt1.imag, float %cflt2.real, float %cflt2.imag, float %cflt3.real, float %cflt3.imag)
+  ret void
+}
+
+; Function Attrs: nofree nosync nounwind
+define void @bar3() local_unnamed_addr {
+; 32BIT-LABEL: bar3:
+; 32BIT:       # %bb.0: # %entry
+; 32BIT-NEXT:    mflr 0
+; 32BIT-NEXT:    stw 0, 8(1)
+; 32BIT-NEXT:    stwu 1, -128(1)
+; 32BIT-NEXT:    lwz 3, L..C11(2) # @cldbl
+; 32BIT-NEXT:    lwz 7, L..C12(2) # @cldbl1
+; 32BIT-NEXT:    lwz 9, L..C13(2) # @cldbl2
+; 32BIT-NEXT:    lwz 10, L..C14(2) # @cldbl3
+; 32BIT-NEXT:    lfd 1, 0(3)
+; 32BIT-NEXT:    stfd 1, 120(1)
+; 32BIT-NEXT:    lfd 2, 8(3)
+; 32BIT-NEXT:    lwz 3, 120(1)
+; 32BIT-NEXT:    lwz 4, 124(1)
+; 32BIT-NEXT:    stfd 2, 112(1)
+; 32BIT-NEXT:    lfd 3, 0(7)
+; 32BIT-NEXT:    lwz 5, 112(1)
+; 32BIT-NEXT:    lwz 6, 116(1)
+; 32BIT-NEXT:    stfd 3, 104(1)
+; 32BIT-NEXT:    lfd 4, 8(7)
+; 32BIT-NEXT:    lwz 7, 104(1)
+; 32BIT-NEXT:    lwz 8, 108(1)
+; 32BIT-NEXT:    stfd 4, 96(1)
+; 32BIT-NEXT:    lfd 5, 0(9)
+; 32BIT-NEXT:    lfd 6, 8(9)
+; 32BIT-NEXT:    lfd 7, 0(10)
+; 32BIT-NEXT:    lfd 8, 8(10)
+; 32BIT-NEXT:    lwz 9, 96(1)
+; 32BIT-NEXT:    lwz 10, 100(1)
+; 32BIT-NEXT:    stfd 6, 64(1)
+; 32BIT-NEXT:    stfd 5, 56(1)
+; 32BIT-NEXT:    stfd 8, 80(1)
+; 32BIT-NEXT:    stfd 7, 72(1)
+; 32BIT-NEXT:    bl .foo3
+; 32BIT-NEXT:    nop
+; 32BIT-NEXT:    addi 1, 1, 128
+; 32BIT-NEXT:    lwz 0, 8(1)
+; 32BIT-NEXT:    mtlr 0
+; 32BIT-NEXT:    blr
+entry:
+  %cldbl.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl, i32 0, i32 0), align 8
+  %cldbl.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl, i32 0, i32 1), align 8
+  %cldbl1.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl1, i32 0, i32 0), align 8
+  %cldbl1.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl1, i32 0, i32 1), align 8
+  %cldbl2.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl2, i32 0, i32 0), align 8
+  %cldbl2.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl2, i32 0, i32 1), align 8
+  %cldbl3.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl3, i32 0, i32 0), align 8
+  %cldbl3.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @cldbl3, i32 0, i32 1), align 8
+  %call = tail call { double, double } (double, double, ...) @foo3(double %cldbl.real, double %cldbl.imag, double %cldbl1.real, double %cldbl1.imag, double %cldbl2.real, double %cldbl2.imag, double %cldbl3.real, double %cldbl3.imag)
+  ret void
+}
Index: clang/test/CodeGen/ppc64-varargs-complex.c
===================================================================
--- clang/test/CodeGen/ppc64-varargs-complex.c
+++ clang/test/CodeGen/ppc64-varargs-complex.c
@@ -1,5 +1,6 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -emit-llvm -o - %s | FileCheck %s
 
 #include <stdarg.h>
 
Index: clang/test/CodeGen/aix32-complex-varargs.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aix32-complex-varargs.c
@@ -0,0 +1,66 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -emit-llvm -o - %s | FileCheck %s
+
+#include <stdarg.h>
+
+void testva (int n, ...)
+{
+  va_list ap;
+
+  _Complex int i   = va_arg(ap, _Complex int);
+// CHECK:  %[[VAR40:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+// CHECK-NEXT:  %[[VAR41:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]]
+// CHECK-NEXT:  store i8* %[[VAR41]], i8** %[[VAR100]], align 4
+// CHECK-NEXT:  %[[VAR4:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR40]] to { i32, i32 }*
+// CHECK-NEXT:  %[[VAR6:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR4]], i32 0, i32 0
+// CHECK-NEXT:  %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR6]]
+// CHECK-NEXT:  %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR4]], i32 0, i32 1
+// CHECK-NEXT:  %[[VAR9:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR8]]
+// CHECK-NEXT:  %[[VAR10:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VARINT:[A-Za-z0-9.]+]], i32 0, i32 0
+// CHECK-NEXT:  %[[VAR11:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VARINT]], i32 0, i32 1
+// CHECK-NEXT:  store i32 %[[VAR7]], i32* %[[VAR10]]
+// CHECK-NEXT:  store i32 %[[VAR9]], i32* %[[VAR11]]
+
+  _Complex short s = va_arg(ap, _Complex short);
+// CHECK:  %[[VAR50:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+// CHECK-NEXT:  %[[VAR51:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]]
+// CHECK-NEXT:  store i8* %[[VAR51]], i8** %[[VAR100]], align 4
+// CHECK-NEXT:  %[[VAR12:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i32 2
+// CHECK-NEXT:  %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i32 6
+// CHECK-NEXT:  %[[VAR14:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR12]] to i16*
+// CHECK-NEXT:  %[[VAR15:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR13]] to i16*
+// CHECK-NEXT:  %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]], align 2
+// CHECK-NEXT:  %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]], align 2
+// CHECK-NEXT:  %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR19:[A-Za-z0-9.]+]], i32 0, i32 0
+// CHECK-NEXT:  %[[VAR20:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR19]], i32 0, i32 1
+// CHECK-NEXT:  store i16 %[[VAR16]], i16* %[[VAR18]]
+// CHECK-NEXT:  store i16 %[[VAR17]], i16* %[[VAR20]]
+
+
+  _Complex char c  = va_arg(ap, _Complex char);
+// CHECK:  %[[VAR60:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
+// CHECK-NEXT:  %[[VAR61:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]]
+// CHECK-NEXT:  store i8* %[[VAR61]], i8** %[[VAR100]], align 4
+// CHECK-NEXT:  %[[VAR21:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i32 3
+// CHECK-NEXT:  %[[VAR22:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i32 7
+// CHECK-NEXT:  %[[VAR23:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR21]]
+// CHECK-NEXT:  %[[VAR24:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR22]]
+// CHECK-NEXT:  %[[VAR25:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR26:[A-Za-z0-9.]+]], i32 0, i32 0
+// CHECK-NEXT:  %[[VAR27:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR26]], i32 0, i32 1
+// CHECK-NEXT:  store i8 %[[VAR23]], i8* %[[VAR25]]
+// CHECK-NEXT:  store i8 %[[VAR24]], i8* %[[VAR27]]
+
+
+  _Complex float f = va_arg(ap, _Complex float);
+// CHECK:  %[[VAR70:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR71:[A-Za-z0-9.]+]], i32 8
+// CHECK-NEXT:  store i8* %[[VAR70]], i8** %[[VAR100:[A-Za-z0-9.]+]]
+// CHECK-NEXT:  %[[VAR28:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR71]] to { float, float }*
+// CHECK-NEXT:  %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR28]], i32 0, i32 0
+// CHECK-NEXT:  %[[VAR30:[A-Za-z0-9.]+]] = load float, float* %[[VAR29]]
+// CHECK-NEXT:  %[[VAR31:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR28]], i32 0, i32 1
+// CHECK-NEXT:  %[[VAR32:[A-Za-z0-9.]+]] = load float, float* %[[VAR31]]
+// CHECK-NEXT:  %[[VAR33:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %f, i32 0, i32 0
+// CHECK-NEXT:  %[[VAR34:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %f, i32 0, i32 1
+// CHECK-NEXT:  store float %[[VAR30]], float* %[[VAR33]]
+// CHECK-NEXT:  store float %[[VAR32]], float* %[[VAR34]]
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -4569,14 +4569,45 @@
 
 Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                               QualType Ty) const {
-  if (Ty->isAnyComplexType())
-    llvm::report_fatal_error("complex type is not supported on AIX yet");
-
   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
   TypeInfo.Align = getParamTypeAlignment(Ty);
 
   CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize);
 
+  // If we have a complex type and the base type is smaller than 8 bytes,
+  // the ABI calls for the real and imaginary parts to be right-adjusted
+  // in separate doublewords. However, Clang expects us to produce a
+  // pointer to a structure with the two parts packed tightly. So generate
+  // loads of the real and imaginary parts relative to the va_list pointer,
+  // and store them to a temporary structure. We do the same as the PPC64ABI
+  // here.
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
+    CharUnits EltSize = TypeInfo.Width / 2;
+    if (EltSize < SlotSize) {
+      Address Addr =
+          emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2,
+                                 SlotSize, SlotSize, /*AllowHigher*/ true);
+
+      Address RealAddr = Addr;
+      Address ImagAddr = RealAddr;
+      RealAddr =
+          CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize);
+      ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
+                                                        2 * SlotSize - EltSize);
+
+      llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
+      RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
+      ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
+      llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
+      llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");
+
+      Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
+      CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
+                             /*init*/ true);
+      return Temp;
+    }
+  }
+  // Otherwise, just use the general rule.
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo,
                           SlotSize, /*AllowHigher*/ true);
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to