biplmish created this revision. biplmish added reviewers: nemanjai, lei, power-llvm-team. biplmish added projects: clang, LLVM, PowerPC. Herald added subscribers: llvm-commits, cfe-commits, shchenz, hiraditya.
This patch implements builtins for the following prototypes: vector unsigned char vec_insertl (unsigned char, vector unsigned char, unsigned int); vector unsigned short vec_insertl (unsigned short, vector unsigned short, unsigned int); vector unsigned int vec_insertl (unsigned int, vector unsigned int, unsigned int); vector unsigned long long vec_insertl (unsigned long long, vector unsigned long long, unsigned int); vector unsigned char vec_insertl (vector unsigned char, vector unsigned char, unsigned int; vector unsigned short vec_insertl (vector unsigned short, vector unsigned short, unsigned int); vector unsigned int vec_insertl (vector unsigned int, vector unsigned int, unsigned int); vector unsigned char vec_inserth (unsigned char, vector unsigned char, unsigned int); vector unsigned short vec_inserth (unsigned short, vector unsigned short, unsigned int); vector unsigned int vec_inserth (unsigned int, vector unsigned int, unsigned int); vector unsigned long long vec_inserth (unsigned long long, vector unsigned long long, unsigned int); vector unsigned char vec_inserth (vector unsigned char, vector unsigned char, unsigned int); vector unsigned short vec_inserth (vector unsigned short, vector unsigned short, unsigned int); vector unsigned int vec_inserth (vector unsigned int, vector unsigned int, unsigned int); vector signed int vec_replace_elt (vector signed int, signed int, const int); vector unsigned int vec_replace_elt (vector unsigned int, unsigned int, const int); vector float vec_replace_elt (vector float, float, const int); vector signed long long vec_replace_elt (vector signed long long, signed long long, const int); vector unsigned long long vec_replace_elt (vector unsigned long long, unsigned long long, const int); vector double rec_replace_elt (vector double, double, const int); vector unsigned char vec_replace_unaligned (vector unsigned char, signed int, const int); vector unsigned char vec_replace_unaligned (vector unsigned char, unsigned int, const int); vector unsigned char vec_replace_unaligned (vector unsigned char, float, const int); vector unsigned char vec_replace_unaligned (vector unsigned char, signed long long, const int); vector unsigned char vec_replace_unaligned (vector unsigned char, unsigned long long, const int); vector unsigned char vec_replace_unaligned (vector unsigned char, double, const int); vector signed char vec_sldb (vector signed char, vector signed char, const unsigned int); vector unsigned char vec_sldb (vector unsigned char, vector unsigned char, const unsigned int); vector signed short vec_sldb (vector signed short, vector signed short, const unsigned int); vector unsigned short vec_sldb (vector unsigned short, vector unsigned short, const unsigned int); vector signed int vec_sldb (vector signed int, vector signed int, const unsigned int); vector unsigned int vec_sldb (vectoextracthr unsigned int, vector unsigned int, const unsigned int); vector signed long long vec_sldb (vector signed long long, vector signed long long, const unsigned int); vector unsigned long long vec_sldb (vector unsigned long long, vector unsigned long long, const unsigned int); vector signed char vec_srdb (vector signed char, vector signed char, const unsigned int); vector unsigned char vec_srdb (vector unsigned char, vector unsigned char, const unsigned int); vector signed short vec_srdb (vector signed short, vector signed short, const unsigned int); vector unsigned short vec_srdb (vector unsigned short, vector unsigned short, const unsigned int); vector signed int vec_srdb (vector signed int, vector signed int, const unsigned int); vector unsigned int vec_srdb (vector unsigned int, vector unsigned int, const unsigned int); vector signed long long vec_srdb (vector signed long long, vector signed long long, const unsigned int);extracth vector unsigned long long vec_srdb (vector unsigned long long, vector unsigned long long, const unsigned int); vector signed int vec_splati (const signed int); vector float vec_splati (const float); vector double vec_splatid (const float); vector signed int vec_splati_ins (vector signed int, const unsigned int, const signed int); vector unsigned int vec_splati_ins (vector unsigned int, const unsigned int, const unsigned int); vector float vec_splati_ins (vector float, const unsigned int, const float); vector signed char vec_blendv (vector signed char, vector signed char, vector unsigned char); vector unsigned char vec_blendv (vector unsigned char, vector unsigned char, vector unsigned char); vector signed short vec_blendv (vector signed short, vector signed short, vector unsigned short); vector unsigned short vec_blendv (vector unsigned short, vector unsigned short, vector unsigned short); vector signed int vec_blendv (vector signed int, vector signed int, vector unsigned int); vector unsigned int vec_blendv (vector unsigned int, vector unsigned int, vector unsigned int); vector signed long long vec_blendv (vector signed long long, vector signed long long, vector unsigned long long); vector unsigned long long vec_blendv (vector unsigned long long, vector unsigned long long, vector unsigned long long); vector float vec_blendv (vector float, vector float, vector unsigned int); vector double vec_blendv (vector double, vector double, vector unsigned long long); vector signed char vec_permx (vector signed char, vector signed char, vector unsigned char, const int); vector unsigned char vec_permx (vector unsigned char, vector unsigned char, vector unsigned char, const int); vector signed short vec_permx (vector signed short, vector signed short, vector unsigned char, const int); vector unsigned short vec_permx (vector unsigned short, vector unsigned short, vector unsigned char, const int); vector signed int vec_permx (vector signed int, vector signed int, vector unsigned char, const int); vector unsigned int vec_permx (vector unsigned int, vector unsigned int, vector unsigned char, const int); vector signed long long vec_permx (vector signed long long, vector signed long long, vector unsigned char, const int); vector unsigned long long vec_permx (vector unsigned long long, vector unsigned long long, vector unsigned char, const int); vector float vec_permx (vector float, vector float, vector unsigned char, const int); vector double vec_permx (vector double, vector double, vector unsigned char, const int); Depends on D80758 <https://reviews.llvm.org/D80758> Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D81836 Files: clang/include/clang/Basic/BuiltinsPPC.def clang/lib/Headers/altivec.h clang/test/CodeGen/builtins-ppc-p10vector.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.td llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/lib/Target/PowerPC/PPCScheduleP9.td llvm/test/CodeGen/PowerPC/p10-permute-ops.ll llvm/test/MC/Disassembler/PowerPC/p10insts.txt llvm/test/MC/PowerPC/p10.s
Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- /dev/null +++ llvm/test/MC/PowerPC/p10.s @@ -0,0 +1,132 @@ +# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-BE %s +# RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-LE %s + +# CHECK-BE: vinsbvlx 1, 3, 5 # encoding: [0x10,0x23,0x28,0x0f] +# CHECK-LE: vinsbvlx 1, 3, 5 # encoding: [0x0f,0x28,0x23,0x10] + vinsbvlx 1, 3, 5 +# CHECK-BE: vinsbvrx 1, 3, 5 # encoding: [0x10,0x23,0x29,0x0f] +# CHECK-LE: vinsbvrx 1, 3, 5 # encoding: [0x0f,0x29,0x23,0x10] + vinsbvrx 1, 3, 5 +# CHECK-BE: vinshvlx 1, 3, 5 # encoding: [0x10,0x23,0x28,0x4f] +# CHECK-LE: vinshvlx 1, 3, 5 # encoding: [0x4f,0x28,0x23,0x10] + vinshvlx 1, 3, 5 +# CHECK-BE: vinshvrx 1, 3, 5 # encoding: [0x10,0x23,0x29,0x4f] +# CHECK-LE: vinshvrx 1, 3, 5 # encoding: [0x4f,0x29,0x23,0x10] + vinshvrx 1, 3, 5 +# CHECK-BE: vinswvlx 1, 3, 5 # encoding: [0x10,0x23,0x28,0x8f] +# CHECK-LE: vinswvlx 1, 3, 5 # encoding: [0x8f,0x28,0x23,0x10] + vinswvlx 1, 3, 5 +# CHECK-BE: vinswvrx 1, 3, 5 # encoding: [0x10,0x23,0x29,0x8f] +# CHECK-LE: vinswvrx 1, 3, 5 # encoding: [0x8f,0x29,0x23,0x10] + vinswvrx 1, 3, 5 +# CHECK-BE: vinsblx 1, 2, 3 # encoding: [0x10,0x22,0x1a,0x0f] +# CHECK-LE: vinsblx 1, 2, 3 # encoding: [0x0f,0x1a,0x22,0x10] + vinsblx 1, 2, 3 +# CHECK-BE: vinsbrx 1, 2, 3 # encoding: [0x10,0x22,0x1b,0x0f] +# CHECK-LE: vinsbrx 1, 2, 3 # encoding: [0x0f,0x1b,0x22,0x10] + vinsbrx 1, 2, 3 +# CHECK-BE: vinshlx 1, 2, 3 # encoding: [0x10,0x22,0x1a,0x4f] +# CHECK-LE: vinshlx 1, 2, 3 # encoding: [0x4f,0x1a,0x22,0x10] + vinshlx 1, 2, 3 +# CHECK-BE: vinshrx 1, 2, 3 # encoding: [0x10,0x22,0x1b,0x4f] +# CHECK-LE: vinshrx 1, 2, 3 # encoding: [0x4f,0x1b,0x22,0x10] + vinshrx 1, 2, 3 +# CHECK-BE: vinswlx 1, 2, 3 # encoding: [0x10,0x22,0x1a,0x8f] +# CHECK-LE: vinswlx 1, 2, 3 # encoding: [0x8f,0x1a,0x22,0x10] + vinswlx 1, 2, 3 +# CHECK-BE: vinswrx 1, 2, 3 # encoding: [0x10,0x22,0x1b,0x8f] +# CHECK-LE: vinswrx 1, 2, 3 # encoding: [0x8f,0x1b,0x22,0x10] + vinswrx 1, 2, 3 +# CHECK-BE: vinsdlx 1, 2, 3 # encoding: [0x10,0x22,0x1a,0xcf] +# CHECK-LE: vinsdlx 1, 2, 3 # encoding: [0xcf,0x1a,0x22,0x10] + vinsdlx 1, 2, 3 +# CHECK-BE: vinsdrx 1, 2, 3 # encoding: [0x10,0x22,0x1b,0xcf] +# CHECK-LE: vinsdrx 1, 2, 3 # encoding: [0xcf,0x1b,0x22,0x10] + vinsdrx 1, 2, 3 +# CHECK-BE: vinsw 2, 3, 12 # encoding: [0x10,0x4c,0x18,0xcf] +# CHECK-LE: vinsw 2, 3, 12 # encoding: [0xcf,0x18,0x4c,0x10] + vinsw 2, 3, 12 +# CHECK-BE: vinsd 2, 3, 12 # encoding: [0x10,0x4c,0x19,0xcf] +# CHECK-LE: vinsd 2, 3, 12 # encoding: [0xcf,0x19,0x4c,0x10] + vinsd 2, 3, 12 +# CHECK-BE: vsldbi 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x56] +# CHECK-LE: vsldbi 2, 3, 4, 5 # encoding: [0x56,0x21,0x43,0x10] + vsldbi 2, 3, 4, 5 +# CHECK-BE: vsrdbi 2, 3, 4, 5 # encoding: [0x10,0x43,0x23,0x56] +# CHECK-LE: vsrdbi 2, 3, 4, 5 # encoding: [0x56,0x23,0x43,0x10] + vsrdbi 2, 3, 4, 5 +# Boundary conditions of 8RR_DForm_IMM32_XT6's immediates +# CHECK-BE: xxspltiw 63, 4294901760 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe7,0x00,0x00] +# CHECK-LE: xxspltiw 63, 4294901760 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0x00,0x00,0xe7,0x83] + xxspltiw 63, 4294901760 +# CHECK-BE: xxspltiw 63, 65535 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x83,0xe7,0xff,0xff] +# CHECK-LE: xxspltiw 63, 65535 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe7,0x83] + xxspltiw 63, 65535 +# CHECK-BE: xxspltiw 63, 4294967295 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe7,0xff,0xff] +# CHECK-LE: xxspltiw 63, 4294967295 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe7,0x83] + xxspltiw 63, 4294967295 +# CHECK-BE: xxspltiw 63, -1 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe7,0xff,0xff] +# CHECK-LE: xxspltiw 63, -1 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe7,0x83] + xxspltiw 63, -1 +# CHECK-BE: xxspltidp 63, 4294967295 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe5,0xff,0xff] +# CHECK-LE: xxspltidp 63, 4294967295 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe5,0x83] + xxspltidp 63, 4294967295 +# Boundary conditions of 8RR_DForm_IMM32_XT6_IX's immediates +# CHECK-BE: xxsplti32dx 63, 1, 4294901760 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe3,0x00,0x00] +# CHECK-LE: xxsplti32dx 63, 1, 4294901760 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0x00,0x00,0xe3,0x83] + xxsplti32dx 63, 1, 4294901760 +# CHECK-BE: xxsplti32dx 63, 1, 65535 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x83,0xe3,0xff,0xff] +# CHECK-LE: xxsplti32dx 63, 1, 65535 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe3,0x83] + xxsplti32dx 63, 1, 65535 +# CHECK-BE: xxsplti32dx 63, 1, 4294967295 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe3,0xff,0xff] +# CHECK-LE: xxsplti32dx 63, 1, 4294967295 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe3,0x83] + xxsplti32dx 63, 1, 4294967295 +# CHECK-BE: xxsplti32dx 63, 1, -1 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe3,0xff,0xff] +# CHECK-LE: xxsplti32dx 63, 1, -1 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe3,0x83] + xxsplti32dx 63, 1, -1 +# CHECK-BE: xxblendvb 6, 63, 21, 34 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x84,0xdf,0xa8,0x8c] +# CHECK-LE: xxblendvb 6, 63, 21, 34 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0x8c,0xa8,0xdf,0x84] + xxblendvb 6, 63, 21, 34 +# CHECK-BE: xxblendvh 6, 63, 21, 34 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x84,0xdf,0xa8,0x9c] +# CHECK-LE: xxblendvh 6, 63, 21, 34 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0x9c,0xa8,0xdf,0x84] + xxblendvh 6, 63, 21, 34 +# CHECK-BE: xxblendvw 6, 63, 21, 34 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x84,0xdf,0xa8,0xac] +# CHECK-LE: xxblendvw 6, 63, 21, 34 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0xac,0xa8,0xdf,0x84] + xxblendvw 6, 63, 21, 34 +# CHECK-BE: xxblendvd 6, 63, 21, 34 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x84,0xdf,0xa8,0xbc] +# CHECK-LE: xxblendvd 6, 63, 21, 34 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0xbc,0xa8,0xdf,0x84] + xxblendvd 6, 63, 21, 34 +# CHECK-BE: xxpermx 6, 63, 21, 34, 2 # encoding: [0x05,0x00,0x00,0x02, +# CHECK-BE-SAME: 0x88,0xdf,0xa8,0x8c] +# CHECK-LE: xxpermx 6, 63, 21, 34, 2 # encoding: [0x02,0x00,0x00,0x05, +# CHECK-LE-SAME: 0x8c,0xa8,0xdf,0x88] + xxpermx 6, 63, 21, 34, 2 + Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -0,0 +1,94 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-linux-gnu \ +# RUN: -mcpu=pwr10 | FileCheck %s + +# CHECK: vinsbvlx 1, 3, 5 +0x10 0x23 0x28 0x0f + +# CHECK: vinsbvrx 1, 3, 5 +0x10 0x23 0x29 0x0f + +# CHECK: vinshvlx 1, 3, 5 +0x10 0x23 0x28 0x4f + +# CHECK: vinshvrx 1, 3, 5 +0x10 0x23 0x29 0x4f + +# CHECK: vinswvlx 1, 3, 5 +0x10 0x23 0x28 0x8f + +# CHECK: vinswvrx 1, 3, 5 +0x10 0x23 0x29 0x8f + +# CHECK: vinsblx 1, 2, 3 +0x10 0x22 0x1a 0x0f + +# CHECK: vinsbrx 1, 2, 3 +0x10 0x22 0x1b 0x0f + +# CHECK: vinshlx 1, 2, 3 +0x10 0x22 0x1a 0x4f + +# CHECK: vinshrx 1, 2, 3 +0x10 0x22 0x1b 0x4f + +# CHECK: vinswlx 1, 2, 3 +0x10 0x22 0x1a 0x8f + +# CHECK: vinswrx 1, 2, 3 +0x10 0x22 0x1b 0x8f + +# CHECK: vinsdlx 1, 2, 3 +0x10 0x22 0x1a 0xcf + +# CHECK: vinsdrx 1, 2, 3 +0x10 0x22 0x1b 0xcf + +# CHECK: vinsw 2, 3, 12 +0x10 0x4c 0x18 0xcf + +# CHECK: vinsd 2, 3, 12 +0x10 0x4c 0x19 0xcf + +# CHECK: vsldbi 2, 3, 4, 5 +0x10 0x43 0x21 0x56 + +# CHECK: vsrdbi 2, 3, 4, 5 +0x10 0x43 0x23 0x56 + +# Boundary conditions of 8RR_DForm_IMM32_XT6's immediates +# CHECK: xxspltiw 63, 4294901760 +0x05 0x00 0xff 0xff 0x83 0xe7 0x00 0x00 + +# CHECK: xxspltiw 63, 65535 +0x05 0x00 0x00 0x00 0x83 0xe7 0xff 0xff + +# CHECK: xxspltiw 63, 4294967295 +0x05 0x00 0xff 0xff 0x83 0xe7 0xff 0xff + +# CHECK: xxspltidp 63, 4294967295 +0x05 0x00 0xff 0xff 0x83 0xe5 0xff 0xff + +# Boundary conditions of 8RR_DForm_IMM32_XT6_IX's immediates +# CHECK: xxsplti32dx 63, 1, 4294901760 +0x05 0x00 0xff 0xff 0x83 0xe3 0x00 0x00 + +# CHECK: xxsplti32dx 63, 1, 65535 +0x05 0x00 0x00 0x00 0x83 0xe3 0xff 0xff + +# CHECK: xxsplti32dx 63, 1, 4294967295 +0x05 0x00 0xff 0xff 0x83 0xe3 0xff 0xff + +# CHECK: xxblendvb 6, 63, 21, 34 +0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0x8c + +# CHECK: xxblendvh 6, 63, 21, 34 +0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0x9c + +# CHECK: xxblendvw 6, 63, 21, 34 +0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0xac + +# CHECK: xxblendvd 6, 63, 21, 34 +0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0xbc + +# CHECK: xxpermx 6, 63, 21, 34, 2 +0x05 0x00 0x00 0x02 0x88 0xdf 0xa8 0x8c Index: llvm/test/CodeGen/PowerPC/p10-permute-ops.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-permute-ops.ll @@ -0,0 +1,292 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s + +define <16 x i8> @testVINSBLX(i64 %a, i64 %b) { +; CHECK-LABEL: testVINSBLX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsblx v2, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsblx(i64 %a, i64 %b) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.altivec.vinsblx(i64, i64) + +define <16 x i8> @testVINSBRX(i64 %a, i64 %b) { +; CHECK-LABEL: testVINSBRX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsbrx v2, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsbrx(i64 %a, i64 %b) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.altivec.vinsbrx(i64, i64) + +define <8 x i16> @testVINSHLX(i64 %a, i64 %b) { +; CHECK-LABEL: testVINSHLX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinshlx v2, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshlx(i64 %a, i64 %b) + ret <8 x i16> %0 +} +declare <8 x i16> @llvm.ppc.altivec.vinshlx(i64, i64) + +define <8 x i16> @testVINSHRX(i64 %a, i64 %b) { +; CHECK-LABEL: testVINSHRX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinshrx v2, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshrx(i64 %a, i64 %b) + ret <8 x i16> %0 +} +declare <8 x i16> @llvm.ppc.altivec.vinshrx(i64, i64) + +define <4 x i32> @testVINSWLX(i64 %a, i64 %b) { +; CHECK-LABEL: testVINSWLX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinswlx v2, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswlx(i64 %a, i64 %b) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.altivec.vinswlx(i64, i64) + +define <4 x i32> @testVINSWRX(i64 %a, i64 %b) { +; CHECK-LABEL: testVINSWRX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinswrx v2, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswrx(i64 %a, i64 %b) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.altivec.vinswrx(i64, i64) + +define <2 x i64> @testVINSDLX(i64 %a, i64 %b) { +; CHECK-LABEL: testVINSDLX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsdlx v2, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = tail call <2 x i64> @llvm.ppc.altivec.vinsdlx(i64 %a, i64 %b) + ret <2 x i64> %0 +} +declare <2 x i64> @llvm.ppc.altivec.vinsdlx(i64, i64) + +define <2 x i64> @testVINSDRX(i64 %a, i64 %b) { +; CHECK-LABEL: testVINSDRX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsdrx v2, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = tail call <2 x i64> @llvm.ppc.altivec.vinsdrx(i64 %a, i64 %b) + ret <2 x i64> %0 +} +declare <2 x i64> @llvm.ppc.altivec.vinsdrx(i64, i64) + +define <16 x i8> @testVINSBVLX(i64 %a, <16 x i8> %b) { +; CHECK-LABEL: testVINSBVLX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsbvlx v2, r3, v2 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsbvlx(i64 %a, <16 x i8> %b) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.altivec.vinsbvlx(i64, <16 x i8>) + +define <16 x i8> @testVINSBVRX(i64 %a, <16 x i8> %b) { +; CHECK-LABEL: testVINSBVRX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsbvrx v2, r3, v2 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsbvrx(i64 %a, <16 x i8> %b) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.altivec.vinsbvrx(i64, <16 x i8>) + +define <8 x i16> @testVINSHVLX(i64 %a, <8 x i16> %b) { +; CHECK-LABEL: testVINSHVLX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinshvlx v2, r3, v2 +; CHECK-NEXT: blr +entry: + %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshvlx(i64 %a, <8 x i16> %b) + ret <8 x i16> %0 +} +declare <8 x i16> @llvm.ppc.altivec.vinshvlx(i64, <8 x i16>) + +define <8 x i16> @testVINSHVRX(i64 %a, <8 x i16> %b) { +; CHECK-LABEL: testVINSHVRX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinshvrx v2, r3, v2 +; CHECK-NEXT: blr +entry: + %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshvrx(i64 %a, <8 x i16> %b) + ret <8 x i16> %0 +} +declare <8 x i16> @llvm.ppc.altivec.vinshvrx(i64, <8 x i16>) + +define <4 x i32> @testVINSWVLX(i64 %a, <4 x i32> %b) { +; CHECK-LABEL: testVINSWVLX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinswvlx v2, r3, v2 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswvlx(i64 %a, <4 x i32> %b) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.altivec.vinswvlx(i64, <4 x i32>) + +define <4 x i32> @testVINSWVRX(i64 %a, <4 x i32> %b) { +; CHECK-LABEL: testVINSWVRX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinswvrx v2, r3, v2 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswvrx(i64 %a, <4 x i32> %b) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.altivec.vinswvrx(i64, <4 x i32>) + +define <4 x i32> @testVINSW(i64 %a) { +; CHECK-LABEL: testVINSW: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsw v2, r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.altivec.vinsw(i64 %a, i32 1) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.altivec.vinsw(i64, i32 immarg) + +define <2 x i64> @testVINSD(i64 %a) { +; CHECK-LABEL: testVINSD: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsd v2, r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <2 x i64> @llvm.ppc.altivec.vinsd(i64 %a, i32 1) + ret <2 x i64> %0 +} +declare <2 x i64> @llvm.ppc.altivec.vinsd(i64, i32 immarg) + +define <16 x i8> @testVSLDBI(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: testVSLDBI: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldbi v2, v2, v3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.vsldbi(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.altivec.vsldbi(<16 x i8>, <16 x i8>, i32 immarg) + +define <16 x i8> @testVSRDBI(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: testVSRDBI: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrdbi v2, v2, v3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.vsrdbi(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.altivec.vsrdbi(<16 x i8>, <16 x i8>, i32 immarg) + +define <4 x i32> @testXXSPLTIW() { +; CHECK-LABEL: testXXSPLTIW: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v2, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.vsx.xxspltiw(i32 1) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.vsx.xxspltiw(i32 immarg) + +define <2 x double> @testXXSPLTIDP() { +; CHECK-LABEL: testXXSPLTIDP: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp v2, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <2 x double> @llvm.ppc.vsx.xxspltidp(i32 1) + ret <2 x double> %0 +} +declare <2 x double> @llvm.ppc.vsx.xxspltidp(i32 immarg) + +define <4 x i32> @testXXSPLTI32DX(<4 x i32> %a) { +; CHECK-LABEL: testXXSPLTI32DX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx v2, -1, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.vsx.xxsplti32dx(<4 x i32> %a, i1 true, i32 1) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.vsx.xxsplti32dx(<4 x i32>, i1 immarg, i32 immarg) + +define <16 x i8> @testXXPERMX(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: testXXPERMX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxpermx v2, v2, v3, v4, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.vsx.xxpermx(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.vsx.xxpermx(<16 x i8>, <16 x i8>, <16 x i8>, i32 immarg) + +define <16 x i8> @testXXBLENDVB(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: testXXBLENDVB: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxblendvb v2, v2, v3, v4 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.vsx.xxblendvb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.vsx.xxblendvb(<16 x i8>, <16 x i8>, <16 x i8>) + +define <8 x i16> @testXXBLENDVH(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: testXXBLENDVH: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxblendvh v2, v2, v3, v4 +; CHECK-NEXT: blr +entry: + %0 = tail call <8 x i16> @llvm.ppc.vsx.xxblendvh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %0 +} +declare <8 x i16> @llvm.ppc.vsx.xxblendvh(<8 x i16>, <8 x i16>, <8 x i16>) + +define <4 x i32> @testXXBLENDVW(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: testXXBLENDVW: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxblendvw v2, v2, v3, v4 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.vsx.xxblendvw(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.vsx.xxblendvw(<4 x i32>, <4 x i32>, <4 x i32>) + +define <2 x i64> @testXXBLENDVD(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: testXXBLENDVD: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxblendvd v2, v2, v3, v4 +; CHECK-NEXT: blr +entry: + %0 = tail call <2 x i64> @llvm.ppc.vsx.xxblendvd(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %0 +} +declare <2 x i64> @llvm.ppc.vsx.xxblendvd(<2 x i64>, <2 x i64>, <2 x i64>) Index: llvm/lib/Target/PowerPC/PPCScheduleP9.td =================================================================== --- llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -42,7 +42,8 @@ // Do not support QPX (Quad Processing eXtension), SPE (Signal Processing // Engine), prefixed instructions on Power 9 or PC relative mem ops. - let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops]; + let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops, + IsISA3_1]; } Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -190,7 +190,162 @@ isPCRel; } +class 8RR_XX4_IMM3_XTABC6<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + bits<3> IMM; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; + let Inst{14-28} = 0; + let Inst{29-31} = IMM; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-57} = XC{4-0}; + let Inst{58-59} = xo; + let Inst{60} = XC{5}; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = XT{5}; +} + +class 8RR_XX4_XTABC6<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; + let Inst{14-31} = 0; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-57} = XC{4-0}; + let Inst{58-59} = xo; + let Inst{60} = XC{5}; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = XT{5}; +} + +// VX-Form: [PO VRT / UIM RB XO]. +// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent +// "/ UIM" (unused bit followed by a 4-bit immediate) +class VX_VRT5_UIM5_RB5<bits<11> xo, string opc, list<dag> pattern> + : VXForm_1<xo, (outs vrrc:$VRT), (ins u4imm:$UIM, g8rc:$RB), + !strconcat(opc, " $VRT, $RB, $UIM"), IIC_VecGeneral, pattern>; + +class VXForm_RD5_MP_VB5<bits<11> xo, bits<4> eo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + bit MP; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-14} = eo; + let Inst{15} = MP; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + +// 8RR:D-Form: [ 1 1 0 // // imm0 +// PO T XO TX imm1 ] +class 8RR_DForm_IMM32_XT6<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<32> IMM32; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; // reserved + let Inst{14-15} = 0; // reserved + let Inst{16-31} = IMM32{31-16}; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-46} = xo; + let Inst{47} = XT{5}; + let Inst{48-63} = IMM32{15-0}; +} + +// 8RR:D-Form: [ 1 1 0 // // imm0 +// PO T XO IX TX imm1 ] +class 8RR_DForm_IMM32_XT6_IX<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bit IX; + bits<32> IMM32; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; // reserved + let Inst{14-15} = 0; // reserved + let Inst{16-31} = IMM32{31-16}; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-45} = xo; + let Inst{46} = IX; + let Inst{47} = XT{5}; + let Inst{48-63} = IMM32{15-0}; +} + +// VN-Form: [PO VRT VRA VRB PS SD XO] +// SD is "Shift Direction" +class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VRT; + bits<5> VRA; + bits<5> VRB; + bits<3> SD; + + let Pattern = pattern; + + let Inst{6-10} = VRT; + let Inst{11-15} = VRA; + let Inst{16-20} = VRB; + let Inst{21-22} = ps; + let Inst{23-25} = SD; + let Inst{26-31} = xo; +} + def PrefixInstrs : Predicate<"PPCSubTarget->hasPrefixInstrs()">; +def IsISA3_1 : Predicate<"PPCSubTarget->isISA3_1()">; let Predicates = [PrefixInstrs] in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { @@ -500,3 +655,140 @@ def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; } +let Predicates = [IsISA3_1] in { + def VINSBVLX : + VXForm_1 + <15, (outs vrrc:$vD), (ins g8rc:$rA, vrrc:$vB), + "vinsbvlx $vD, $rA, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (int_ppc_altivec_vinsbvlx + i64:$rA, v16i8:$vB))]>; + def VINSBVRX : + VXForm_1 + <271, (outs vrrc:$vD), (ins g8rc:$rA, vrrc:$vB), + "vinsbvrx $vD, $rA, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (int_ppc_altivec_vinsbvrx + i64:$rA, v16i8:$vB))]>; + def VINSHVLX : + VXForm_1 + <79, (outs vrrc:$vD), (ins g8rc:$rA, vrrc:$vB), + "vinshvlx $vD, $rA, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (int_ppc_altivec_vinshvlx + i64:$rA, v8i16:$vB))]>; + def VINSHVRX : + VXForm_1 + <335, (outs vrrc:$vD), (ins g8rc:$rA, vrrc:$vB), + "vinshvrx $vD, $rA, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (int_ppc_altivec_vinshvrx + i64:$rA, v8i16:$vB))]>; + def VINSWVLX : + VXForm_1 + <143, (outs vrrc:$vD), (ins g8rc:$rA, vrrc:$vB), + "vinswvlx $vD, $rA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vinswvlx + i64:$rA, v4i32:$vB))]>; + def VINSWVRX : + VXForm_1 + <399, (outs vrrc:$vD), (ins g8rc:$rA, vrrc:$vB), + "vinswvrx $vD, $rA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vinswvrx + i64:$rA, v4i32:$vB))]>; + def VINSBLX : VXForm_1<527, (outs vrrc:$vD), (ins g8rc:$rA, g8rc:$rB), + "vinsblx $vD, $rA, $rB", IIC_VecGeneral, + [(set v16i8:$vD, (int_ppc_altivec_vinsblx + i64:$rA, i64:$rB))]>; + def VINSBRX : VXForm_1<783, (outs vrrc:$vD), (ins g8rc:$rA, g8rc:$rB), + "vinsbrx $vD, $rA, $rB", IIC_VecGeneral, + [(set v16i8:$vD, (int_ppc_altivec_vinsbrx + i64:$rA, i64:$rB))]>; + def VINSHLX : VXForm_1<591, (outs vrrc:$vD), (ins g8rc:$rA, g8rc:$rB), + "vinshlx $vD, $rA, $rB", IIC_VecGeneral, + [(set v8i16:$vD, (int_ppc_altivec_vinshlx + i64:$rA, i64:$rB))]>; + def VINSHRX : VXForm_1<847, (outs vrrc:$vD), (ins g8rc:$rA, g8rc:$rB), + "vinshrx $vD, $rA, $rB", IIC_VecGeneral, + [(set v8i16:$vD, (int_ppc_altivec_vinshrx + i64:$rA, i64:$rB))]>; + def VINSWLX : VXForm_1<655, (outs vrrc:$vD), (ins g8rc:$rA, g8rc:$rB), + "vinswlx $vD, $rA, $rB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vinswlx + i64:$rA, i64:$rB))]>; + def VINSWRX : VXForm_1<911, (outs vrrc:$vD), (ins g8rc:$rA, g8rc:$rB), + "vinswrx $vD, $rA, $rB", IIC_VecGeneral, + [(set v4i32:$vD, (int_ppc_altivec_vinswrx + i64:$rA, i64:$rB))]>; + def VINSDLX : VXForm_1<719, (outs vrrc:$vD), (ins g8rc:$rA, g8rc:$rB), + "vinsdlx $vD, $rA, $rB", IIC_VecGeneral, + [(set v2i64:$vD, (int_ppc_altivec_vinsdlx + i64:$rA, i64:$rB))]>; + def VINSDRX : VXForm_1<975, (outs vrrc:$vD), (ins g8rc:$rA, g8rc:$rB), + "vinsdrx $vD, $rA, $rB", IIC_VecGeneral, + [(set v2i64:$vD, (int_ppc_altivec_vinsdrx + i64:$rA, i64:$rB))]>; + def VINSW : VX_VRT5_UIM5_RB5<207, "vinsw", [(set v4i32:$VRT, (int_ppc_altivec_vinsw + i64:$RB, timm:$UIM))]>; + def VINSD : VX_VRT5_UIM5_RB5<463, "vinsd", [(set v2i64:$VRT, (int_ppc_altivec_vinsd + i64:$RB, timm:$UIM))]>; + def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT), + (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), + "vsldbi $VRT, $VRA, $VRB, $SH", + IIC_VecGeneral, + [(set v16i8:$VRT, (int_ppc_altivec_vsldbi + v16i8:$VRA, v16i8:$VRB, i32:$SH))]>; + def VSRDBI : VNForm_VTAB5_SD3<22, 1, (outs vrrc:$VRT), + (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), + "vsrdbi $VRT, $VRA, $VRB, $SH", + IIC_VecGeneral, + [(set v16i8:$VRT, (int_ppc_altivec_vsrdbi + v16i8:$VRA, v16i8:$VRB, i32:$SH))]>; +} + +let Predicates = [PrefixInstrs] in { + def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT), + (ins i32imm:$IMM32), + "xxspltiw $XT, $IMM32", IIC_VecGeneral, + []>; + def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT), + (ins i32imm:$IMM32), + "xxspltidp $XT, $IMM32", IIC_VecGeneral, + [(set v2f64:$XT, + (PPCxxspltidp i32:$IMM32))]>; + def XXSPLTI32DX : + 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT), + (ins vsrc:$XTi, i1imm:$IX, i32imm:$IMM32), + "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; + def XXPERMX : + 8RR_XX4_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, + vsrc:$XC, u3imm:$UIM), "xxpermx $XT, $XA, $XB, $XC, $UIM", + IIC_VecPerm, []>; + def XXBLENDVB : + 8RR_XX4_XTABC6<33, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxblendvb $XT, $XA, $XB, $XC", IIC_VecGeneral, []>; + def XXBLENDVH : + 8RR_XX4_XTABC6<33, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxblendvh $XT, $XA, $XB, $XC", IIC_VecGeneral, []>; + def XXBLENDVW : + 8RR_XX4_XTABC6<33, 2, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxblendvw $XT, $XA, $XB, $XC", IIC_VecGeneral, []>; + def XXBLENDVD : + 8RR_XX4_XTABC6<33, 3, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxblendvd $XT, $XA, $XB, $XC", IIC_VecGeneral, []>; + + def : Pat<(int_ppc_vsx_xxspltiw timm:$A), + (XXSPLTIW $A)>; + def : Pat<(int_ppc_vsx_xxspltidp timm:$A), + (XXSPLTIDP $A)>; + def : Pat<(int_ppc_vsx_xxsplti32dx v4i32:$A, i1:$B, timm:$C), + (XXSPLTI32DX $A, $B, $C)>; + def : Pat<(v16i8 (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C , timm:$D)), + (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC), (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $C, VSRC), $D), VSRC)>; + def : Pat<(v16i8 (int_ppc_vsx_xxblendvb v16i8:$A, v16i8:$B, v16i8:$C)), + (COPY_TO_REGCLASS (XXBLENDVB (COPY_TO_REGCLASS $A, VSRC), (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $C, VSRC)), VSRC)>; + def : Pat<(v8i16 (int_ppc_vsx_xxblendvh v8i16:$A, v8i16:$B, v8i16:$C)), + (COPY_TO_REGCLASS (XXBLENDVH (COPY_TO_REGCLASS $A, VSRC), (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $C, VSRC)), VSRC)>; + def : Pat<(int_ppc_vsx_xxblendvw v4i32:$A, v4i32:$B, v4i32:$C), + (XXBLENDVW $A, $B, $C)>; + def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C), + (XXBLENDVD $A, $B, $C)>; +} + Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -50,6 +50,10 @@ SDTCisVec<1>, SDTCisInt<2> ]>; +def SDT_PPCSpToDp : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, + SDTCisInt<1> +]>; + def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; @@ -194,6 +198,7 @@ def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; +def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>; def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>; def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -97,6 +97,11 @@ /// XXSPLT, + /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for + /// converting immediate single precision numbers to double precision + /// vector or scalar. + XXSPLTI_SP_TO_DP, + /// VECINSERT - The PPC vector insert instruction /// VECINSERT, Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1471,6 +1471,8 @@ case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; + case PPCISD::XXSPLTI_SP_TO_DP: + return "PPCISD::XXSPLTI_SP_TO_DP"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -410,7 +410,62 @@ Intrinsic<[llvm_v2i64_ty],[llvm_v2i64_ty],[IntrNoMem]>; def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">, Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>; - + //P10 Vector Insert + def int_ppc_altivec_vinsblx : GCCBuiltin<"__builtin_altivec_vinsblx">, + Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinsbrx : GCCBuiltin<"__builtin_altivec_vinsbrx">, + Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinshlx : GCCBuiltin<"__builtin_altivec_vinshlx">, + Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinshrx : GCCBuiltin<"__builtin_altivec_vinshrx">, + Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinswlx : GCCBuiltin<"__builtin_altivec_vinswlx">, + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinswrx : GCCBuiltin<"__builtin_altivec_vinswrx">, + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinsdlx : GCCBuiltin<"__builtin_altivec_vinsdlx">, + Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinsdrx : GCCBuiltin<"__builtin_altivec_vinsdrx">, + Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinsbvlx : GCCBuiltin<"__builtin_altivec_vinsbvlx">, + Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinsbvrx : GCCBuiltin<"__builtin_altivec_vinsbvrx">, + Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinshvlx : GCCBuiltin<"__builtin_altivec_vinshvlx">, + Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinshvrx : GCCBuiltin<"__builtin_altivec_vinshvrx">, + Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinswvlx : GCCBuiltin<"__builtin_altivec_vinswvlx">, + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinswvrx : GCCBuiltin<"__builtin_altivec_vinswvrx">, + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vinsw : GCCBuiltin<"__builtin_altivec_vinsw">, + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + def int_ppc_altivec_vinsd : GCCBuiltin<"__builtin_altivec_vinsd">, + Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + //P10 Vector Shift Double Bit Immediate + def int_ppc_altivec_vsldbi : GCCBuiltin<"__builtin_altivec_vsldbi">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_ppc_altivec_vsrdbi : GCCBuiltin<"__builtin_altivec_vsrdbi">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; } // Vector average. @@ -939,6 +994,33 @@ PowerPC_VSX_Intrinsic<"xxinsertw",[llvm_v4i32_ty], [llvm_v4i32_ty,llvm_v2i64_ty,llvm_i32_ty], [IntrNoMem]>; +// P10 VSX Vector splat +def int_ppc_vsx_xxspltiw : GCCBuiltin<"__builtin_vsx_xxspltiw">, + Intrinsic<[llvm_v4i32_ty],[llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<0>>]>; +def int_ppc_vsx_xxspltidp: GCCBuiltin<"__builtin_vsx_xxspltidp">, + Intrinsic<[llvm_v2f64_ty],[llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<0>>]>; +def int_ppc_vsx_xxsplti32dx: GCCBuiltin<"__builtin_vsx_xxsplti32dx">, + Intrinsic<[llvm_v4i32_ty],[llvm_v4i32_ty,llvm_i1_ty,llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; +// P10 VSX Vector permute extended +def int_ppc_vsx_xxpermx: GCCBuiltin<"__builtin_vsx_xxpermx">, + Intrinsic<[llvm_v16i8_ty],[llvm_v16i8_ty,llvm_v16i8_ty,llvm_v16i8_ty,llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<3>>]>; +// P10 VSX Vector blend variable +def int_ppc_vsx_xxblendvb: GCCBuiltin<"__builtin_vsx_xxblendvb">, + Intrinsic<[llvm_v16i8_ty],[llvm_v16i8_ty,llvm_v16i8_ty,llvm_v16i8_ty], + [IntrNoMem]>; +def int_ppc_vsx_xxblendvh: GCCBuiltin<"__builtin_vsx_xxblendvh">, + Intrinsic<[llvm_v8i16_ty],[llvm_v8i16_ty,llvm_v8i16_ty,llvm_v8i16_ty], + [IntrNoMem]>; +def int_ppc_vsx_xxblendvw: GCCBuiltin<"__builtin_vsx_xxblendvw">, + Intrinsic<[llvm_v4i32_ty],[llvm_v4i32_ty,llvm_v4i32_ty,llvm_v4i32_ty], + [IntrNoMem]>; +def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">, + Intrinsic<[llvm_v2i64_ty],[llvm_v2i64_ty,llvm_v2i64_ty,llvm_v2i64_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- /dev/null +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -0,0 +1,544 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \ +// RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \ +// RUN: -o - | FileCheck %s + +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \ +// RUN: -target-cpu pwr10 -triple powerpc64-unknown-unknown -emit-llvm %s \ +// RUN: -o - | FileCheck %s -check-prefix=CHECK-BE + +// RUN: %clang_cc1 -target-feature +vsx -target-feature +altivec \ +// RUN: -target-cpu pwr10 -triple powerpc64le-unknown-unknown -emit-llvm %s \ +// RUN: -o - | FileCheck %s -check-prefix=CHECK-LE + +#include <altivec.h> + +vector signed char vsca, vscb; +vector unsigned char vuca, vucb, vucc; +vector signed short vssa, vssb; +vector unsigned short vusa, vusb, vusc; +vector signed int vsia, vsib; +vector unsigned int vuia, vuib, vuic; +vector signed long long vslla, vsllb; +vector unsigned long long vulla, vullb, vullc; +vector float vfa, vfb; +vector double vda, vdb; + +unsigned char uca; +unsigned short usa; +unsigned int uia, uib; +signed int sia; +const int cia; +float fa; +unsigned long long ulla; +unsigned long ula; +signed long long slla; +double da; + +vector unsigned char test_vec_insertl_uc(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsblx(i64 %{{.+}}, i64 + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsbrx(i64 %{{.+}}, i64 + // CHECK-LE-NEXT: ret <16 x i8> + return vec_insertl(uca, vuca, uia); +} + +vector unsigned short test_vec_insertl_us(void) { + // CHECK-BE: @llvm.ppc.altivec.vinshlx(i64 %{{.+}}, i64 + // CHECK-BE-NEXT: ret <8 x i16> + // CHECK-LE: @llvm.ppc.altivec.vinshrx(i64 %{{.+}}, i64 + // CHECK-LE-NEXT: ret <8 x i16> + return vec_insertl(usa, vusa, uia); +} + +vector unsigned int test_vec_insertl_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinswlx(i64 %{{.+}}, i64 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinswrx(i64 %{{.+}}, i64 + // CHECK-LE-NEXT: ret <4 x i32> + return vec_insertl(uib, vuia, uia); +} + +vector unsigned long long test_vec_insertl_ul(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsdlx(i64 %{{.+}}, i64 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vinsdrx(i64 %{{.+}}, i64 + // CHECK-LE-NEXT: ret <2 x i64> + return vec_insertl(ulla, vulla, uia); +} + +vector unsigned char test_vec_insertl_ucv(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsbvlx(i64 %{{.+}}, <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsbvrx(i64 %{{.+}}, <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_insertl(vuca, vucb, uia); +} + +vector unsigned short test_vec_insertl_usv(void) { + // CHECK-BE: @llvm.ppc.altivec.vinshvlx(i64 %{{.+}}, <8 x i16> + // CHECK-BE-NEXT: ret <8 x i16> + // CHECK-LE: @llvm.ppc.altivec.vinshvrx(i64 %{{.+}}, <8 x i16> + // CHECK-LE-NEXT: ret <8 x i16> + return vec_insertl(vusa, vusb, uia); +} + +vector unsigned int test_vec_insertl_uiv(void) { + // CHECK-BE: @llvm.ppc.altivec.vinswvlx(i64 %{{.+}}, <4 x i32> + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinswvrx(i64 %{{.+}}, <4 x i32> + // CHECK-LE-NEXT: ret <4 x i32> + return vec_insertl(vuia, vuib, uia); +} + +vector unsigned char test_vec_inserth_uc(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsbrx(i64 %{{.+}}, i64 + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsblx(i64 %{{.+}}, i64 + // CHECK-LE-NEXT: ret <16 x i8> + return vec_inserth(uca, vuca, uia); +} + +vector unsigned short test_vec_inserth_us(void) { + // CHECK-BE: @llvm.ppc.altivec.vinshrx(i64 %{{.+}}, i64 + // CHECK-BE-NEXT: ret <8 x i16> + // CHECK-LE: @llvm.ppc.altivec.vinshlx(i64 %{{.+}}, i64 + // CHECK-LE-NEXT: ret <8 x i16> + return vec_inserth(usa, vusa, uia); +} + +vector unsigned int test_vec_inserth_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinswrx(i64 %{{.+}}, i64 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinswlx(i64 %{{.+}}, i64 + // CHECK-LE-NEXT: ret <4 x i32> + return vec_inserth(uib, vuia, uia); +} + +vector unsigned long long test_vec_inserth_ul(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsdrx(i64 %{{.+}}, i64 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vinsdlx(i64 %{{.+}}, i64 + // CHECK-LE-NEXT: ret <2 x i64> + return vec_inserth(ulla, vulla, uia); +} + +vector unsigned char test_vec_inserth_ucv(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsbvrx(i64 %{{.+}}, <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsbvlx(i64 %{{.+}}, <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_inserth(vuca, vucb, uia); +} + +vector unsigned short test_vec_inserth_usv(void) { + // CHECK-BE: @llvm.ppc.altivec.vinshvrx(i64 %{{.+}}, <8 x i16> + // CHECK-BE-NEXT: ret <8 x i16> + // CHECK-LE: @llvm.ppc.altivec.vinshvlx(i64 %{{.+}}, <8 x i16> + // CHECK-LE-NEXT: ret <8 x i16> + return vec_inserth(vusa, vusb, uia); +} + +vector unsigned int test_vec_inserth_uiv(void) { + // CHECK-BE: @llvm.ppc.altivec.vinswvrx(i64 %{{.+}}, <4 x i32> + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinswvlx(i64 %{{.+}}, <4 x i32> + // CHECK-LE-NEXT: ret <4 x i32> + return vec_inserth(vuia, vuib, uia); +} + +vector signed int test_vec_replace_elt_si(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: ret <4 x i32> + return vec_replace_elt(vsia, sia, 0); +} + +vector unsigned int test_vec_replace_elt_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: ret <4 x i32> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: ret <4 x i32> + return vec_replace_elt(vuia, uia, 0); +} + +vector float test_vec_replace_elt_f(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-BE-NEXT: ret <4 x float> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-LE-NEXT: ret <4 x float> + return vec_replace_elt(vfa, fa, 0); +} + +vector signed long long test_vec_replace_elt_sll(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: ret <2 x i64> + return vec_replace_elt(vslla, slla, 0); +} + +vector unsigned long long test_vec_replace_elt_ull(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: ret <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: ret <2 x i64> + return vec_replace_elt(vulla, ulla, 0); +} + +vector double test_vec_replace_elt_d(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-BE-NEXT: ret <2 x double> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-LE-NEXT: ret <2 x double> + return vec_replace_elt(vda, da, 0); +} + +vector unsigned char test_vec_replace_unaligned_si(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vsia, sia, 0); +} + +vector unsigned char test_vec_replace_unaligned_ui(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vuia, uia, 0); +} + +vector unsigned char test_vec_replace_unaligned_f(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsw(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vfa, fa, 0); +} + +vector unsigned char test_vec_replace_unaligned_sll(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vslla, slla, 0); +} + +vector unsigned char test_vec_replace_unaligned_ull(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vulla, ulla, 0); +} + +vector unsigned char test_vec_replace_unaligned_d(void) { + // CHECK-BE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-BE-NEXT: ret <16 x i8> + // CHECK-LE: @llvm.ppc.altivec.vinsd(i64 %{{.+}}, i32 + // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8> + // CHECK-LE-NEXT: ret <16 x i8> + return vec_replace_unaligned(vda, da, 0); +} + +vector signed char test_vec_sldb_sc(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_sldb(vsca, vscb, 0); +} + +vector unsigned char test_vec_sldb_uc(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_sldb(vuca, vucb, 0); +} + +vector signed short test_vec_sldb_ss(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_sldb(vssa, vssb, 0); +} + +vector unsigned short test_vec_sldb_us(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_sldb(vusa, vusb, 0); +} + +vector signed int test_vec_sldb_si(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_sldb(vsia, vsib, 0); +} + +vector unsigned int test_vec_sldb_ui(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_sldb(vuia, vuib, 0); +} + +vector signed long long test_vec_sldb_sll(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_sldb(vslla, vsllb, 0); +} + +vector unsigned long long test_vec_sldb_ull(void) { + // CHECK: @llvm.ppc.altivec.vsldbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_sldb(vulla, vullb, 0); +} + +vector signed char test_vec_srdb_sc(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_srdb(vsca, vscb, 0); +} + +vector unsigned char test_vec_srdb_uc(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_srdb(vuca, vucb, 0); +} + +vector signed short test_vec_srdb_ss(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_srdb(vssa, vssb, 0); +} + +vector unsigned short test_vec_srdb_us(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_srdb(vusa, vusb, 0); +} + +vector signed int test_vec_srdb_si(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_srdb(vsia, vsib, 0); +} + +vector unsigned int test_vec_srdb_ui(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_srdb(vuia, vuib, 0); +} + +vector signed long long test_vec_srdb_sll(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_srdb(vslla, vsllb, 0); +} + +vector unsigned long long test_vec_srdb_ull(void) { + // CHECK: @llvm.ppc.altivec.vsrdbi(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_srdb(vulla, vullb, 0); +} + +vector signed char test_vec_blend_sc(void) { + // CHECK: @llvm.ppc.vsx.xxblendvb(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_blendv(vsca, vscb, vucc); +} + +vector unsigned char test_vec_blend_uc(void) { + // CHECK: @llvm.ppc.vsx.xxblendvb(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_blendv(vuca, vucb, vucc); +} + +vector signed short test_vec_blend_ss(void) { + // CHECK: @llvm.ppc.vsx.xxblendvh(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_blendv(vssa, vssb, vusc); +} + +vector unsigned short test_vec_blend_us(void) { + // CHECK: @llvm.ppc.vsx.xxblendvh(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_blendv(vusa, vusb, vusc); +} + +vector signed int test_vec_blend_si(void) { + // CHECK: @llvm.ppc.vsx.xxblendvw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_blendv(vsia, vsib, vuic); +} + +vector unsigned int test_vec_blend_ui(void) { + // CHECK: @llvm.ppc.vsx.xxblendvw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_blendv(vuia, vuib, vuic); +} + +vector signed long long test_vec_blend_sll(void) { + // CHECK: @llvm.ppc.vsx.xxblendvd(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_blendv(vslla, vsllb, vullc); +} + +vector unsigned long long test_vec_blend_ull(void) { + // CHECK: @llvm.ppc.vsx.xxblendvd(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_blendv(vulla, vullb, vullc); +} + +vector float test_vec_blend_f(void) { + // CHECK: @llvm.ppc.vsx.xxblendvw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-NEXT: ret <4 x float> + return vec_blendv(vfa, vfb, vuic); +} + +vector double test_vec_blend_d(void) { + // CHECK: @llvm.ppc.vsx.xxblendvd(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> + // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-NEXT: ret <2 x double> + return vec_blendv(vda, vdb, vullc); +} + +vector signed char test_vec_permx_sc(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_permx(vsca, vscb, vucc, 0); +} + +vector unsigned char test_vec_permx_uc(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_permx(vuca, vucb, vucc, 0); +} + +vector signed short test_vec_permx_ss(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_permx(vssa, vssb, vucc, 0); +} + +vector unsigned short test_vec_permx_us(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_permx(vusa, vusb, vucc, 0); +} + +vector signed int test_vec_permx_si(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_permx(vsia, vsib, vucc, 0); +} + +vector unsigned int test_vec_permx_ui(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_permx(vuia, vuib, vucc, 0); +} + +vector signed long long test_vec_permx_sll(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_permx(vslla, vsllb, vucc, 0); +} + +vector unsigned long long test_vec_permx_ull(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_permx(vulla, vullb, vucc, 0); +} + +vector float test_vec_permx_f(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x float> + // CHECK-NEXT: ret <4 x float> + return vec_permx(vfa, vfb, vucc, 0); +} + +vector double test_vec_permx_d(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x double> + // CHECK-NEXT: ret <2 x double> + return vec_permx(vda, vdb, vucc, 0); +} + +vector signed int test_vec_vec_splati_si(void) { + // CHECK: ret <4 x i32> + return vec_splati(-1); +} + +vector unsigned int test_vec_vec_splati_ui(void) { + // CHECK: ret <4 x i32> + return vec_splati(1); +} + +vector float test_vec_vec_splati_f(void) { + // CHECK: ret <4 x float> + return vec_splati(1.0f); +} + +vector double test_vec_vec_splatid(void) { + // CHECK: ret <2 x double> + return vec_splatid(1.0); +} + +vector signed int test_vec_vec_splati_ins_si(void) { + // CHECK-BE: insertelement <4 x i32> + // CHECK-BE: ret <4 x i32> + // CHECK-LE: insertelement <4 x i32> + // CHECK-LE: ret <4 x i32> + return vec_splati_ins(vsia, 0, 1); +} + +vector unsigned int test_vec_vec_splati_ins_ui(void) { + // CHECK-BE: insertelement <4 x i32> + // CHECK-BE: ret <4 x i32> + // CHECK-LE: insertelement <4 x i32> + // CHECK-LE: ret <4 x i32> + return vec_splati_ins(vuia, 0, (unsigned int)1); +} + +vector float test_vec_vec_splati_ins_f(void) { + // CHECK-BE: insertelement <4 x float> + // CHECK-BE: ret <4 x float> + // CHECK-LE: insertelement <4 x float> + // CHECK-LE: ret <4 x float> + return vec_splati_ins(vfa, 0, 1.0); +} Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -16761,6 +16761,425 @@ static vector signed char __ATTRS_o_ai vec_nabs(vector signed char __a) { return __builtin_altivec_vminsb(__a, -__a); } + +#ifdef __POWER10_VECTOR__ +/* vec_insertl */ +static __inline__ vector unsigned char __ATTRS_o_ai +vec_insertl(unsigned char __a, vector unsigned char __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinsbrx(__c, __a); +#else + return __builtin_altivec_vinsblx(__c, __a); +#endif +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_insertl(unsigned short __a, vector unsigned short __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinshrx(__c, __a); +#else + return __builtin_altivec_vinshlx(__c, __a); +#endif +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_insertl(unsigned int __a, vector unsigned int __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinswrx(__c, __a); +#else + return __builtin_altivec_vinswlx(__c, __a); +#endif +} + +static __inline__ vector unsigned long long __ATTRS_o_ai vec_insertl( + unsigned long long __a, vector unsigned long long __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinsdrx(__c, __a); +#else + return __builtin_altivec_vinsdlx(__c, __a); +#endif +} + +static __inline__ vector unsigned char __ATTRS_o_ai vec_insertl( + vector unsigned char __a, vector unsigned char __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinsbvrx(__c, __a); +#else + return __builtin_altivec_vinsbvlx(__c, __a); +#endif +} + +static __inline__ vector unsigned short __ATTRS_o_ai vec_insertl( + vector unsigned short __a, vector unsigned short __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinshvrx(__c, __a); +#else + return __builtin_altivec_vinshvlx(__c, __a); +#endif +} + +static __inline__ vector unsigned int __ATTRS_o_ai vec_insertl( + vector unsigned int __a, vector unsigned int __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinswvrx(__c, __a); +#else + return __builtin_altivec_vinswvlx(__c, __a); +#endif +} + +/* vec_inserth */ +static __inline__ vector unsigned char __ATTRS_o_ai +vec_inserth(unsigned char __a, vector unsigned char __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinsblx(__c, __a); +#else + return __builtin_altivec_vinsbrx(__c, __a); +#endif +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_inserth(unsigned short __a, vector unsigned short __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinshlx(__c, __a); +#else + return __builtin_altivec_vinshrx(__c, __a); +#endif +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_inserth(unsigned int __a, vector unsigned int __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinswlx(__c, __a); +#else + return __builtin_altivec_vinswrx(__c, __a); +#endif +} + +static __inline__ vector unsigned long long __ATTRS_o_ai vec_inserth( + unsigned long long __a, vector unsigned long long __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinsdlx(__c, __a); +#else + return __builtin_altivec_vinsdrx(__c, __a); +#endif +} + +static __inline__ vector unsigned char __ATTRS_o_ai vec_inserth( + vector unsigned char __a, vector unsigned char __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinsbvlx(__c, __a); +#else + return __builtin_altivec_vinsbvrx(__c, __a); +#endif +} + +static __inline__ vector unsigned short __ATTRS_o_ai vec_inserth( + vector unsigned short __a, vector unsigned short __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinshvlx(__c, __a); +#else + return __builtin_altivec_vinshvrx(__c, __a); +#endif +} + +static __inline__ vector unsigned int __ATTRS_o_ai vec_inserth( + vector unsigned int __a, vector unsigned int __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vinswvlx(__c, __a); +#else + return __builtin_altivec_vinswvrx(__c, __a); +#endif +} + +/* vec_replace */ +#ifdef __LITTLE_ENDIAN__ +#define vec_replace_elt(__a, __b, __c) \ + _Generic( \ + (__b), signed int \ + : (vector signed int)__builtin_altivec_vinsw((signed int)(__b), \ + (12 - ((__c)*4))), \ + unsigned int \ + : (vector unsigned int)__builtin_altivec_vinsw((unsigned int)(__b), \ + (12 - ((__c)*4))), \ + float \ + : (vector float)__builtin_altivec_vinsw((float)(__b), (12 - ((__c)*4))), \ + signed long long \ + : (vector signed long long)__builtin_altivec_vinsd( \ + (signed long long)(__b), (8 - ((__c)*8))), \ + unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vinsd( \ + (unsigned long long)(__b), (8 - ((__c)*8))), \ + double \ + : (vector double)__builtin_altivec_vinsd((double)(__b), \ + (8 - ((__c)*8)))) + +#define vec_replace_unaligned(__a, __b, __c) \ + _Generic((__b), signed int \ + : (vector unsigned char)__builtin_altivec_vinsw((signed int)(__b), \ + (12 - (__c))), \ + unsigned int \ + : (vector unsigned char)__builtin_altivec_vinsw( \ + (unsigned int)(__b), (12 - (__c))), \ + float \ + : (vector unsigned char)__builtin_altivec_vinsw((float)(__b), \ + (12 - (__c))), \ + signed long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + (signed long long)(__b), (8 - (__c))), \ + unsigned long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + (unsigned long long)(__b), (8 - (__c))), \ + double \ + : (vector unsigned char)__builtin_altivec_vinsd((double)(__b), \ + (8 - (__c)))) + +#else +#define vec_replace_elt(__a, __b, __c) \ + _Generic((__b), signed int \ + : (vector signed int)__builtin_altivec_vinsw((signed int)(__b), \ + ((__c)*4)), \ + unsigned int \ + : (vector unsigned int)__builtin_altivec_vinsw((unsigned int)(__b), \ + ((__c)*4)), \ + float \ + : (vector float)__builtin_altivec_vinsw((float)(__b), ((__c)*4)), \ + signed long long \ + : (vector signed long long)__builtin_altivec_vinsd( \ + (signed long long)(__b), ((__c)*8)), \ + unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vinsd( \ + (unsigned long long)(__b), ((__c)*8)), \ + double \ + : (vector double)__builtin_altivec_vinsd((double)(__b), ((__c)*8))) + +#define vec_replace_unaligned(__a, __b, __c) \ + _Generic( \ + (__b), signed int \ + : (vector unsigned char)__builtin_altivec_vinsw((signed int)(__b), \ + (__c)), \ + unsigned int \ + : (vector unsigned char)__builtin_altivec_vinsw((unsigned int)(__b), \ + (__c)), \ + float \ + : (vector unsigned char)__builtin_altivec_vinsw((float)(__b), (__c)), \ + signed long long \ + : (vector unsigned char)__builtin_altivec_vinsd((signed long long)(__b), \ + (__c)), \ + unsigned long long \ + : (vector unsigned char)__builtin_altivec_vinsd( \ + (unsigned long long)(__b), (__c)), \ + double \ + : (vector unsigned char)__builtin_altivec_vinsd((double)(__b), (__c))) +#endif + +/* vec_sldbi */ +#define vec_sldb(__a, __b, __c) \ + _Generic((__a), vector signed char \ + : (vector signed char)__builtin_altivec_vsldbi( \ + (vector signed char)(__a), (__b), (__c)), \ + vector unsigned char \ + : (vector unsigned char)__builtin_altivec_vsldbi( \ + (vector unsigned char)(__a), (__b), (__c)), \ + vector signed short \ + : (vector signed short)__builtin_altivec_vsldbi( \ + (vector signed short)(__a), (__b), (__c)), \ + vector unsigned short \ + : (vector unsigned short)__builtin_altivec_vsldbi( \ + (vector unsigned short)(__a), (__b), (__c)), \ + vector signed int \ + : (vector signed int)__builtin_altivec_vsldbi( \ + (vector signed int)(__a), (__b), (__c)), \ + vector unsigned int \ + : (vector unsigned int)__builtin_altivec_vsldbi( \ + (vector unsigned int)(__a), (__b), (__c)), \ + vector signed long long \ + : (vector signed int)__builtin_altivec_vsldbi( \ + (vector signed int)(__a), (__b), (__c)), \ + vector unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vsldbi( \ + (vector unsigned long long)(__a), (__b), (__c))) +/* vec_srdbi */ +#define vec_srdb(__a, __b, __c) \ + _Generic((__a), vector signed char \ + : (vector signed char)__builtin_altivec_vsrdbi( \ + (vector signed char)(__a), (__b), (__c)), \ + vector unsigned char \ + : (vector unsigned char)__builtin_altivec_vsrdbi( \ + (vector unsigned char)(__a), (__b), (__c)), \ + vector signed short \ + : (vector signed short)__builtin_altivec_vsrdbi( \ + (vector signed short)(__a), (__b), (__c)), \ + vector unsigned short \ + : (vector unsigned short)__builtin_altivec_vsrdbi( \ + (vector unsigned short)(__a), (__b), (__c)), \ + vector signed int \ + : (vector signed int)__builtin_altivec_vsrdbi( \ + (vector signed int)(__a), (__b), (__c)), \ + vector unsigned int \ + : (vector unsigned int)__builtin_altivec_vsrdbi( \ + (vector unsigned int)(__a), (__b), (__c)), \ + vector signed long long \ + : (vector signed int)__builtin_altivec_vsrdbi( \ + (vector signed int)(__a), (__b), (__c)), \ + vector unsigned long long \ + : (vector unsigned long long)__builtin_altivec_vsrdbi( \ + (vector unsigned long long)(__a), (__b), (__c))) + +#ifdef __VSX__ +/* vec_splati */ +#define vec_splati(__a) \ + _Generic((__a), signed int \ + : ((vector signed int)__a), unsigned int \ + : ((vector unsigned int)__a), float \ + : ((vector float)__a)) + +/* vec_spatid */ +static __inline__ vector double __ATTRS_o_ai vec_splatid(const float __a) { + return ((vector double)((double)__a)); +} + +/* vec_splati_ins */ +#ifdef __LITTLE_ENDIAN__ +static __inline__ vector signed int __ATTRS_o_ai vec_splati_ins( + vector signed int __a, const unsigned int __b, const signed int __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[1 - __b] = __c; + __a[2 + 1 - __b] = __c; + return __a; +} + +static __inline__ vector unsigned int __ATTRS_o_ai vec_splati_ins( + vector unsigned int __a, const unsigned int __b, const unsigned int __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[1 - __b] = __c; + __a[2 + 1 - __b] = __c; + return __a; +} + +static __inline__ vector float __ATTRS_o_ai +vec_splati_ins(vector float __a, const unsigned int __b, const float __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[1 - __b] = __c; + __a[2 + 1 - __b] = __c; + return __a; +} +#else +static __inline__ vector signed int __ATTRS_o_ai vec_splati_ins( + vector signed int __a, const unsigned int __b, const signed int __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[__b] = __c; + __a[2 + __b] = __c; + return __a; +} + +static __inline__ vector unsigned int __ATTRS_o_ai vec_splati_ins( + vector unsigned int __a, const unsigned int __b, const unsigned int __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[__b] = __c; + __a[2 + __b] = __c; + return __a; +} + +static __inline__ vector float __ATTRS_o_ai +vec_splati_ins(vector float __a, const unsigned int __b, const float __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[__b] = __c; + __a[2 + __b] = __c; + return __a; +} +#endif + +/* vec_permx */ +#define vec_permx(__a, __b, __c, __d) \ + _Generic((__a), vector signed char \ + : (vector signed char)__builtin_vsx_xxpermx( \ + (vector signed char)(__a), (__b), (__c), (__d)), \ + vector unsigned char \ + : (vector unsigned char)__builtin_vsx_xxpermx( \ + (vector unsigned char)(__a), (__b), (__c), (__d)), \ + vector signed short \ + : (vector signed short)__builtin_vsx_xxpermx( \ + (vector signed short)(__a), (__b), (__c), (__d)), \ + vector unsigned short \ + : (vector unsigned short)__builtin_vsx_xxpermx( \ + (vector unsigned short)(__a), (__b), (__c), (__d)), \ + vector signed int \ + : (vector signed int)__builtin_vsx_xxpermx( \ + (vector signed int)(__a), (__b), (__c), (__d)), \ + vector unsigned int \ + : (vector unsigned int)__builtin_vsx_xxpermx( \ + (vector unsigned int)(__a), (__b), (__c), (__d)), \ + vector signed long long \ + : (vector signed long long)__builtin_vsx_xxpermx( \ + (vector signed long long)(__a), (__b), (__c), (__d)), \ + vector unsigned long long \ + : (vector unsigned long long)__builtin_vsx_xxpermx( \ + (vector unsigned long long)(__a), (__b), (__c), (__d)), \ + vector float \ + : (vector float)__builtin_vsx_xxpermx((vector float)(__a), (__b), \ + (__c), (__d)), \ + vector double \ + : (vector double)__builtin_vsx_xxpermx((vector double)(__a), (__b), \ + (__c), (__d))) + +/* vec_blendv */ +static __inline__ vector signed char __ATTRS_o_ai vec_blendv( + vector signed char __a, vector signed char __b, vector unsigned char __c) { + return __builtin_vsx_xxblendvb(__a, __b, __c); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_blendv(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_vsx_xxblendvb(__a, __b, __c); +} + +static __inline__ vector signed short __ATTRS_o_ai +vec_blendv(vector signed short __a, vector signed short __b, + vector unsigned short __c) { + return __builtin_vsx_xxblendvh(__a, __b, __c); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_blendv(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return __builtin_vsx_xxblendvh(__a, __b, __c); +} + +static __inline__ vector signed int __ATTRS_o_ai vec_blendv( + vector signed int __a, vector signed int __b, vector unsigned int __c) { + return __builtin_vsx_xxblendvw(__a, __b, __c); +} + +static __inline__ vector unsigned int __ATTRS_o_ai vec_blendv( + vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { + return __builtin_vsx_xxblendvw(__a, __b, __c); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_blendv(vector signed long long __a, vector signed long long __b, + vector unsigned long long __c) { + return __builtin_vsx_xxblendvd(__a, __b, __c); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_blendv(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned long long __c) { + return __builtin_vsx_xxblendvd(__a, __b, __c); +} + +static __inline__ vector float __ATTRS_o_ai +vec_blendv(vector float __a, vector float __b, vector unsigned int __c) { + return __builtin_vsx_xxblendvw(__a, __b, __c); +} + +static __inline__ vector double __ATTRS_o_ai vec_blendv( + vector double __a, vector double __b, vector unsigned long long __c) { + return __builtin_vsx_xxblendvd(__a, __b, __c); +} +#endif /* __VSX__ */ +#endif /* __POWER10_VECTOR__ */ + #undef __ATTRS_o_ai #endif /* __ALTIVEC_H */ Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -298,6 +298,28 @@ BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector insert built-ins +BUILTIN(__builtin_altivec_vinsblx, "V16UcULLiULLi", "") +BUILTIN(__builtin_altivec_vinsbrx, "V16UcULLiULLi", "") +BUILTIN(__builtin_altivec_vinshlx, "V8UsULLiULLi", "") +BUILTIN(__builtin_altivec_vinshrx, "V8UsULLiULLi", "") +BUILTIN(__builtin_altivec_vinswlx, "V4UiULLiULLi", "") +BUILTIN(__builtin_altivec_vinswrx, "V4UiULLiULLi", "") +BUILTIN(__builtin_altivec_vinsdlx, "V2ULLiULLiULLi", "") +BUILTIN(__builtin_altivec_vinsdrx, "V2ULLiULLiULLi", "") +BUILTIN(__builtin_altivec_vinsbvlx, "V16UcULLiV16Uc", "") +BUILTIN(__builtin_altivec_vinsbvrx, "V16UcULLiV16Uc", "") +BUILTIN(__builtin_altivec_vinshvlx, "V8UsULLiV8Us", "") +BUILTIN(__builtin_altivec_vinshvrx, "V8UsULLiV8Us", "") +BUILTIN(__builtin_altivec_vinswvlx, "V4UiULLiV4Ui", "") +BUILTIN(__builtin_altivec_vinswvrx, "V4UiULLiV4Ui", "") +BUILTIN(__builtin_altivec_vinsw, "V4UiULLiIi", "") +BUILTIN(__builtin_altivec_vinsd, "V2ULLiULLiIi", "") + +// P10 Vector shift built-ins +BUILTIN(__builtin_altivec_vsldbi, "V16UcV16UcV16UcIi", "") +BUILTIN(__builtin_altivec_vsrdbi, "V16UcV16UcV16UcIi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") @@ -422,6 +444,15 @@ BUILTIN(__builtin_vsx_xxpermdi, "v.", "t") BUILTIN(__builtin_vsx_xxsldwi, "v.", "t") +// P10 Vector blend built-ins +BUILTIN(__builtin_vsx_xxblendvb, "V16UcV16UcV16UcV16Uc", "") +BUILTIN(__builtin_vsx_xxblendvh, "V8UsV8UsV8UsV8Us", "") +BUILTIN(__builtin_vsx_xxblendvw, "V4UiV4UiV4UiV4Ui", "") +BUILTIN(__builtin_vsx_xxblendvd, "V2ULLiV2ULLiV2ULLiV2ULLi", "") + +// P10 Vector permute extended built-in +BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "") + // Float 128 built-ins BUILTIN(__builtin_sqrtf128_round_to_odd, "LLdLLd", "") BUILTIN(__builtin_addf128_round_to_odd, "LLdLLdLLd", "")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits