pengfei updated this revision to Diff 484838. pengfei marked 2 inline comments as done. pengfei added a comment.
Address review comments. Thanks! Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D140467/new/ https://reviews.llvm.org/D140467 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-x86-reduce.c Index: clang/test/CodeGen/builtins-x86-reduce.c =================================================================== --- /dev/null +++ clang/test/CodeGen/builtins-x86-reduce.c @@ -0,0 +1,37 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 %s -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm -o - | FileCheck %s + +typedef double double8 __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: @foo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store <8 x double> [[A:%.*]], ptr [[A_ADDR]], align 64 +// CHECK-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, ptr [[A_ADDR]], align 64 +// CHECK-NEXT: [[TMP1:%.*]] = call reassoc double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// CHECK-NEXT: ret double [[ADD]] +// +double foo(double8 a, double b) { + return __builtin_ia32_reduce_fadd_pd512(0.0, a) + b; +} + +#pragma clang fp reassociate(on) +// CHECK-LABEL: @bar( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store <8 x double> [[A:%.*]], ptr [[A_ADDR]], align 64 +// CHECK-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, ptr [[A_ADDR]], align 64 +// CHECK-NEXT: [[TMP1:%.*]] = call reassoc double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd reassoc double [[TMP1]], [[TMP2]] +// CHECK-NEXT: ret double [[ADD]] +// +double bar(double8 a, double b) { + return __builtin_ia32_reduce_fadd_pd512(0.0, a) + b; +} Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -14737,6 +14737,7 @@ case X86::BI__builtin_ia32_reduce_fadd_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } @@ -14747,6 +14748,7 @@ case X86::BI__builtin_ia32_reduce_fmul_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } @@ -14757,6 +14759,7 @@ case X86::BI__builtin_ia32_reduce_fmax_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); } @@ -14767,6 +14770,7 @@ case X86::BI__builtin_ia32_reduce_fmin_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); }
Index: clang/test/CodeGen/builtins-x86-reduce.c =================================================================== --- /dev/null +++ clang/test/CodeGen/builtins-x86-reduce.c @@ -0,0 +1,37 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 %s -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm -o - | FileCheck %s + +typedef double double8 __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: @foo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store <8 x double> [[A:%.*]], ptr [[A_ADDR]], align 64 +// CHECK-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, ptr [[A_ADDR]], align 64 +// CHECK-NEXT: [[TMP1:%.*]] = call reassoc double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// CHECK-NEXT: ret double [[ADD]] +// +double foo(double8 a, double b) { + return __builtin_ia32_reduce_fadd_pd512(0.0, a) + b; +} + +#pragma clang fp reassociate(on) +// CHECK-LABEL: @bar( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x double>, align 64 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store <8 x double> [[A:%.*]], ptr [[A_ADDR]], align 64 +// CHECK-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, ptr [[A_ADDR]], align 64 +// CHECK-NEXT: [[TMP1:%.*]] = call reassoc double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd reassoc double [[TMP1]], [[TMP2]] +// CHECK-NEXT: ret double [[ADD]] +// +double bar(double8 a, double b) { + return __builtin_ia32_reduce_fadd_pd512(0.0, a) + b; +} Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -14737,6 +14737,7 @@ case X86::BI__builtin_ia32_reduce_fadd_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } @@ -14747,6 +14748,7 @@ case X86::BI__builtin_ia32_reduce_fmul_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } @@ -14757,6 +14759,7 @@ case X86::BI__builtin_ia32_reduce_fmax_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); } @@ -14767,6 +14770,7 @@ case X86::BI__builtin_ia32_reduce_fmin_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType()); + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits