================ @@ -0,0 +1,2177 @@ +//===-------------------- HexagonXQFloatGenerator.cpp --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass enables generation of XQFloat instructions. XQF instructions +// are more efficient, but can be less precise in comparison to IEEE ones. +// Based on the accuracy preservation of the generated code, we enabled four +// modes - Strict IEEE-754 compliant, IEEE-754 compliant, Lossy subnormals and +// legacy mode. +// +// Strict IEEE mode adheres to similar accuracy and precision as of IEEE-754. +// +// IEEE-754 compliant mode excludes IEEE-754 overflows and lower precision +// subnormals due to larger dynamic range than IEEE-754. +// All subnormals have extra precision. +// +// Lossy subnormals mode without normalization result in a loss of accuracy. +// This provides greater precision than a clamp of subnormals to 0. +// If dataset excludes subnormals, it behavas as IEEE-754 compliant mode. +// +// The direct mode has a loss of 1 bit of accuracy compared to IEEE-754. +// +// V79 replaces the prior internal HVX floating point format for floating-point +// arithmetic. The new internal HVX floating-point format yields results +// identical to IEEE-754 round-to-even mode. The new format contains more bits +// than IEEE-754, which optionally produces results with greater range and +// accuracy. Only the HVX vector registers use the HVX floating-point format. +// Memory maintains all floating-point data in IEEE-754 format, +// and all loads/stores use the IEEE-754 format. A subset of HVX floating-point +// operations transform IEEE-754 floating-point data to HVX floating-point data. +// Subsequent HVX floating-point instructions may consume operands in the HVX +// floating-point without conversion to IEEE-754, which allows for performant +// & energy efficient code. The program does not need to switch between formats +// continuously. The program must convert the HVX floating-point results to +// IEEE-754 prior to storing to memory. + +// HVX floating-point achieves IEEE-754 compliance through normalization. +// The program may skip normalization when faster calculation is desired, and +// IEEE-754 compliance isn’t required. HVX floating-point contains two input +// types: qf32, single precision floating point, and qf16, half precision +// floating point. In Hexagon, IEEE-754 contains two input types: sf, single +// precision floating point, and hf, half precision floating point. +// +// Only HVX floating-point source and destination instructions use HVX +// floating-point values. Instructions specify the HVX floating-point format +// with the qf16 and qf32 identifier. A source vector register will drop the +// extended state of a HVX floating-point value when an instruction reads the +// source vector register without the qf16 or qf32 identifier. A destination +// vector register will reset its extended state when an instruction writes to +// a vector register without the qf16 or qf32 identifier. When dropping the +// extended state, the floating-point value loses accuracy. The program may +// preserve the floating-point value by converting HVX floating-point values +// to IEEE-754 values. Compiler must convert HVX floating-point values to +// IEEE-754 values before using as an input to stores, permutes, shifts, and +// any other operations that do not source the HVX floating-point format. +// +// Depending on the desired results, HVX floating-point operations may have +// some requirements on the input sources. The HVX floating-point values +// require normalization to achieve IEEE-754 compliance, while faster operations +// may skip normalization. The program normalizes HVX floating-point values +// before subsequent HVX floating-point operations, so the floating-point value +// does not lose precision. The program also obtains results identical to +// IEEE-754 by converting all HVX floating-point results to IEEE-754 format +// before consumed in any subsequent operation. There are however cases where +// this conversion is redundant, or the differences between IEEE-754 and HVX +// floating-point may not be a concern. +// +// The conversion logic can be understood by the table below: +// +// ================================================================================================================================================ +// | | | | +// | Inputs to add/subtarct | Inputs to +// multiplication instuctions | Non-HVX floating +// point | | instructions | | instruction +// | | | | | +// ===============================================================================================================================================| +// Sources | IEEE- | HVX | HVX | sf | qf32 | qf32 | hf +// | qf16 | qf16 | IEE-754 | HVX | HVX | +// | 754 | floating | floating | | from | from | | +// from | from | | floating | floating | | | +// point | point | | mult | adder | | mult +// | adder | | point | point | | | from | +// from | | | | | | | | +// from | from | | | multi | adder | | +// | | | | | | mult | +// adder | | | | | | | | | | | +// | | | +// ===============================================================================================================================================| +// Strict | Direct | Convert | Convert | Normalize | Convert | Convert +// | widening | Convert | Convert | Direct | Convert | Convert | IEEE-754 +// | Use | to | to | | to IEEE | to IEEE | multiply +// | to IEEE, | to IEEE, | use | to | to | compliance | | +// IEEE | IEEE | | then | then | then | widening +// | widening | | IEEE | IEEE | +// | | | | | normalize | normalize +// | convert | multiply,| multiply,| | | | +// | | | | | | | to IEEE +// | convert | convert | | | | | | +// | | | | | | to +// IEEE | to IEEE | | | | +// -----------------------------------------------------------------------------------------------------------------------------------------------| +// IEEE-754 | Direct | Direct | Direct | Normalize | Direct | Normalize +// | Widening | Direct | Widening | Direct | Convert | Convert | compliance +// | Use | Use | Use | | use | | multiply +// | use | multiply | use | to IEEE | to IEEE | +// -----------------------------------------------------------------------------------------------------------------------------------------------| +// Lossy | Direct | Direct | Direct | Direct | Direct | Normalize +// | Direct | Direct | Widening | Direct | Convert | Convert | Subnormals +// | Use | Use | Use | Use | use | | use | +// use | multiply | use | to IEEE | to IEEE | +// -----------------------------------------------------------------------------------------------------------------------------------------------| +// Direct | Direct | Direct | Direct | Direct | Direct | Direct | +// Direct | Direct | Direct | Direct | Direct | Direct | Lossy | +// Use | Use | Use | Use | use | use | use | +// use | use | use | use | use | +// -----------------------------------------------------------------------------------------------------------------------------------------------| +// +// For v81, the normalization sequence changes. Instead of multiplying 0 +// and -0, a simple copy operation normalizes the unnormal value. Both +// qf and IEEE-754 value can be unnormal. +// Additionally for v81, we have two new vsub instructions which are handled. + +#define HEXAGON_XQFLOAT_GENERATOR "XQFloat Generator pass" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "vector" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "hexagon-xqf-gen" + +using namespace llvm; + +extern cl::opt<QFloatMode> QFloatModeValue; + +// Master flag to enable XQF generations +cl::opt<bool> EnableHVXXQFloat("enable-xqf-gen", cl::init(false), + cl::desc("Enable XQFloat generations")); +// Master flag to remove extraneous qf to sf/hf conversions +cl::opt<bool> + EnableConversionsRemoval("enable-rem-conv", cl::init(false), + cl::desc("Enable extraneous conversions removal")); + +// Diagnostic flags +cl::opt<bool> PrintDebug("debug-print", cl::init(false), + cl::desc("Print function mir after transformation")); +cl::opt<bool> + EnableConvDiag("enable-diag-conv", cl::init(false), + cl::desc("Print function after conversion removal.")); + +// This vector contains the opcodes which generate qf32 from add/subtract +SmallVector<unsigned short, 7> XQFPAdd32 = { + // vector add instructions + Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32, Hexagon::V6_vadd_qf32_mix, + + // vector subtract instructions + Hexagon::V6_vsub_qf32, Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_sf, + Hexagon::V6_vsub_sf_mix}; + +// This vector contains the opcodes which generate qf16 from add/subtract +SmallVector<unsigned short, 7> XQFPAdd16 = { + // vector add instructions + Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16, Hexagon::V6_vadd_qf16_mix, + + // vector subtract intrutions + Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16, Hexagon::V6_vsub_qf16_mix, + Hexagon::V6_vsub_hf_mix}; + +// This vector contains the opcodes which generate qf32 from multiplication +SmallVector<unsigned short, 5> XQFPMult32 = { + Hexagon::V6_vmpy_qf32, Hexagon::V6_vmpy_qf32_qf16, Hexagon::V6_vmpy_qf32_hf, + Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32_mix_hf}; +// This vector contains the opcodes which generate qf16 from multiplication +SmallVector<unsigned short, 3> XQFPMult16 = {Hexagon::V6_vmpy_qf16, + Hexagon::V6_vmpy_qf16_hf, + Hexagon::V6_vmpy_qf16_mix_hf}; + +namespace llvm { +FunctionPass *createHexagonXQFloatGenerator(); +void initializeHexagonXQFloatGeneratorPass(PassRegistry &); +} // namespace llvm + +namespace { + +struct HexagonXQFloatGenerator : public MachineFunctionPass { +public: + static char ID; + HexagonXQFloatGenerator() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { return HEXAGON_XQFLOAT_GENERATOR; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + // Handle each XQF optimization level + bool HandleStrictIEEE(MachineFunction &); + bool HandleCompliantIEEE(MachineFunction &); + bool HandleLossySubnormals(MachineFunction &); + bool HandleLossyLegacy(MachineFunction &); + + // Checkers functions for input operands + bool checkIfInputFromAdder32(Register Reg); + bool checkIfInputFromAdder16(Register Reg); + bool checkIfInputFromMult32(Register Reg); + bool checkIfInputFromMult16(Register Reg); + bool deleteList(); + + // Helper functions for conversion/normalization/widening + bool widenMultiplicationInputF16(MachineInstr &, Register &, Register &, + Register &, bool); + bool widenMultiplicationInputF16Rt(MachineInstr &, Register &, Register &, + Register &); + void widenMultiplyInputHF(MachineInstr &, Register &, Register &, Register &); + bool normalizeMultiplicationInputF32(MachineInstr &, Register &, Register &, + Register &, Register &, bool &); + void normalizeMultiplicationInputSF(MachineInstr &, Register &, Register &, + Register &, Register &, bool &); + bool convertNormalizeMultOp32(MachineInstr &, Register &, Register &, + Register &, Register &, bool &); + bool convertWidenMultOp16(MachineInstr &, Register &, Register &, Register &, + bool); + bool convertWidenMultOp32(MachineInstr &, Register &, Register &, Register &, + bool); + void createPrologInstructions(MachineInstr &, Register &); + bool convertAddOpToIEEE16(MachineInstr &, Register &, Register &, Register &, + bool, bool, bool); + bool convertAddOpToIEEE32(MachineInstr &, Register &, Register &, Register &, + bool, bool, bool); + void generateQF16FromQF32(MachineInstr &, Register &, Register &); + bool convertIfInputToNonHVX(MachineInstr &, bool); + void createConvertInstr(MachineInstr *, Register &, Register &, bool); + + // V81 specific normalization function + bool V81normalizeMultF32(MachineInstr &, Register &, Register &, Register &, + bool, bool, bool); + + const HexagonSubtarget *HST = nullptr; + const HexagonInstrInfo *HII = nullptr; + MachineRegisterInfo *MRI = nullptr; + + SmallVector<MachineInstr *, 16> + OriginalMI; // Hold the instructions to be deleted +}; + +// Print machine function +static void debug_print([[maybe_unused]] MachineFunction &MF) { + dbgs() << "\n=== Printing function ===\n"; +#ifndef NDEBUG + for (MachineBasicBlock &MBB : MF) + MBB.dump(); +#endif // NDEBUG +} + +// This class removes redundant vector convert instructions from qf to hf/sf. +// Additionally, it relaces use of sf/hf registers with qf types. +// The resulting code is complete without dangling instructions. +// FIXME: Liveness is not preserved. +class VectorConvertRemove { + +public: + VectorConvertRemove(MachineFunction &_MF, MachineRegisterInfo *_MRI, + const HexagonSubtarget *_HST) + : MF(_MF), MRI(_MRI), HST(_HST) { + HII = HST->getInstrInfo(); + } + + void run(); + +private: + MachineFunction &MF; + MachineRegisterInfo *MRI; + const HexagonSubtarget *HST; + const HexagonInstrInfo *HII; + + enum Operation { Add16, Add32, Sub16, Sub32, Mul16, Mul32 }; + // Helper functions + void handle_addsub_sf_sf(MachineInstr &, Register &, Register &, Register &, + bool); + void handle_addsub_qf_sf(MachineInstr &, Register &, Register &, Register &, + bool); + void handle_addsubmul_hf_hf(MachineInstr &, Register &, Register &, + Register &, Operation); + void handle_addsubmul_qf_hf(MachineInstr &, Register &, Register &, + Register &, Operation); + void handle_qf32_mul_sf_sf(MachineInstr &, Register &, Register &, + Register &); + void handle_qf16_mul_hf_hf(MachineInstr &, Register &, Register &, + Register &); + bool checkHVXUses32(MachineInstr *, MachineInstr *); + bool checkHVXUses16(MachineInstr *, MachineInstr *); + unsigned getOperation(Operation, bool, bool); + + // List which holds conversion instructions + SmallPtrSet<MachineInstr *, 16> ConvInstrList; + // List which holds qf handling instructions + std::vector<MachineInstr *> SfHfInstrList; +}; + +// both : both operands are replaced +unsigned VectorConvertRemove::getOperation(Operation Op, bool firstOpQf, + bool secOpQf) { + if (firstOpQf && secOpQf) { + switch (Op) { + case Add16: + return Hexagon::V6_vadd_qf16; + case Add32: + return Hexagon::V6_vadd_qf32; + case Sub16: + return Hexagon::V6_vsub_qf16; + case Sub32: + return Hexagon::V6_vsub_qf32; + case Mul16: + return Hexagon::V6_vmpy_qf16; + case Mul32: + return Hexagon::V6_vmpy_qf32_qf16; + } + } else if (firstOpQf) { + switch (Op) { + case Add16: + return Hexagon::V6_vadd_qf16_mix; + case Add32: + return Hexagon::V6_vadd_qf32_mix; + case Sub16: + return Hexagon::V6_vsub_qf16_mix; + case Sub32: + return Hexagon::V6_vsub_qf32_mix; + case Mul16: + return Hexagon::V6_vmpy_qf16_mix_hf; + case Mul32: + return Hexagon::V6_vmpy_qf32_mix_hf; + } + } else if (secOpQf) { + switch (Op) { + case Sub16: + return Hexagon::V6_vsub_hf_mix; + case Sub32: + return Hexagon::V6_vsub_sf_mix; + default: + break; + } + } else { + } + llvm_unreachable("Unknown opcode and operand combination!"); +} + +// Return false if there are multiple instructions where the qf32 is used +// other than the instruction for which it is called +bool VectorConvertRemove::checkHVXUses32(MachineInstr *MI, + MachineInstr *UseMI) { + Register convReg = MI->getOperand(0).getReg(); + // Iterate over all uses of the Def we are analyzing + for (auto &MO : make_range(MRI->use_begin(convReg), MRI->use_end())) { + MachineInstr *UMI = MO.getParent(); + if (UMI == UseMI) + continue; + // Since the convert cannot be deleted, we set the operand as NOT kill + MI->getOperand(1).setIsKill(false); + return false; + } + return true; +} + +// Return false if there are multiple instructions where the qf16 is used +// other than the instruction for which it is called +bool VectorConvertRemove::checkHVXUses16(MachineInstr *MI, + MachineInstr *UseMI) { + Register convReg = MI->getOperand(0).getReg(); + // Iterate over all uses of the Def we are analyzing + for (auto &MO : make_range(MRI->use_begin(convReg), MRI->use_end())) { + MachineInstr *UMI = MO.getParent(); + if (UMI == UseMI) + continue; + // Since the convert cannot be deleted, we set the operand as NOT kill + MI->getOperand(1).setIsKill(false); + return false; + } + return true; +} + +// Removes converts feeding to op(sf,sf), and replaces its sf operands with qf +void VectorConvertRemove::handle_addsub_sf_sf(MachineInstr &MI, Register &Reg1, + Register &Reg2, Register &Dest, + bool isAdd) { + + MachineBasicBlock &MBB = *MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + + bool firstConv = false, secConv = false; + bool DefOp1_del = false, DefOp2_del = false; + Register Src1, Src2; + + MachineInstr *DefOp1 = MRI->getVRegDef(Reg1); + MachineInstr *DefOp2 = MRI->getVRegDef(Reg2); + // check if the first operand is from a convert operation + if (DefOp1->getOpcode() == Hexagon::V6_vconv_sf_qf32) { + if (checkHVXUses32(DefOp1, &MI)) + DefOp1_del = true; + Src1 = DefOp1->getOperand(1).getReg(); + firstConv = true; + } + + // check if the second operand is from a convert operation + if (DefOp2->getOpcode() == Hexagon::V6_vconv_sf_qf32) { + if (checkHVXUses32(DefOp2, &MI)) + DefOp2_del = true; + Src2 = DefOp2->getOperand(1).getReg(); + secConv = true; + } + + if (firstConv && secConv) { + BuildMI(MBB, MI, DL, + HII->get(getOperation(isAdd ? Operation::Add32 : Operation::Sub32, + true, true)), + Dest) + .addReg(Src1) + .addReg(Src2); + SfHfInstrList.push_back(&MI); + } else if (firstConv) { + BuildMI(MBB, MI, DL, + HII->get(getOperation(isAdd ? Operation::Add32 : Operation::Sub32, + true, false)), + Dest) + .addReg(Src1) + .addReg(Reg2); + SfHfInstrList.push_back(&MI); + } else if (secConv) { + // For v79, there is no provision for 2nd op being qf for add/sub + if (HST->useHVXV81Ops()) { + if (isAdd) + BuildMI(MBB, MI, DL, HII->get(Hexagon::V6_vadd_qf32_mix), Dest) + .addReg(Src2) + .addReg(Reg1); + else + BuildMI(MBB, MI, DL, HII->get(Hexagon::V6_vsub_sf_mix), Dest) + .addReg(Reg1) + .addReg(Src2); + SfHfInstrList.push_back(&MI); + // For v79, there is no provision for 2nd op being qf for add/sub. Since + // add is commutative, the ops can be rotated. + } else if (HST->useHVXV79Ops()) { + // for vadd we interchange the ops, for vsub we ignore + if (isAdd) { + BuildMI(MBB, MI, DL, HII->get(Hexagon::V6_vadd_qf32_mix), Dest) + .addReg(Src2) + .addReg(Reg1); + SfHfInstrList.push_back(&MI); + } else // don't delete the convert instruction for vsub + DefOp2_del = false; + } + } else { // none of the operands are from convert instructions + } + + if (DefOp1_del) + ConvInstrList.insert(DefOp1); + if (DefOp2_del) + ConvInstrList.insert(DefOp2); +} + +// Removes converts feeding to op(hf,hf), and replaces its hf operands with qf +void VectorConvertRemove::handle_addsubmul_hf_hf(MachineInstr &MI, + Register &Reg1, Register &Reg2, + Register &Dest, Operation Op) { + + MachineBasicBlock &MBB = *MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + + bool firstConv = false, secConv = false; + bool DefOp1_del = false, DefOp2_del = false; + bool isSub = Op == Operation::Sub16; + Register Src1, Src2; + + MachineInstr *DefOp1 = MRI->getVRegDef(Reg1); + MachineInstr *DefOp2 = MRI->getVRegDef(Reg2); + // check if the first operand is from a convert operation + if (DefOp1->getOpcode() == Hexagon::V6_vconv_hf_qf16) { + if (checkHVXUses16(DefOp1, &MI)) + DefOp1_del = true; + Src1 = DefOp1->getOperand(1).getReg(); + firstConv = true; + } + + // check if the second operand is from a convert operation + if (DefOp2->getOpcode() == Hexagon::V6_vconv_hf_qf16) { + if (checkHVXUses16(DefOp2, &MI)) + DefOp2_del = true; + Src2 = DefOp2->getOperand(1).getReg(); + secConv = true; + } + + if (firstConv && secConv) { + BuildMI(MBB, MI, DL, HII->get(getOperation(Op, true, true)), Dest) + .addReg(Src1) + .addReg(Src2); + SfHfInstrList.push_back(&MI); + } else if (firstConv) { + BuildMI(MBB, MI, DL, HII->get(getOperation(Op, true, false)), Dest) + .addReg(Src1) + .addReg(Reg2); + SfHfInstrList.push_back(&MI); + } else if (secConv) { + // For v81, we interchange the ops for vadd/vmul + // for vsub we use qf as second operand + if (HST->useHVXV81Ops()) { + if (!isSub) + BuildMI(MBB, MI, DL, HII->get(getOperation(Op, true, false)), Dest) + .addReg(Src2) + .addReg(Reg1); + else + BuildMI(MBB, MI, DL, HII->get(getOperation(Op, false, true)), Dest) + .addReg(Reg1) + .addReg(Src2); + SfHfInstrList.push_back(&MI); + } else if (HST->useHVXV79Ops()) { + // for vadd/vmul we interchange the ops, for vsub we ignore + if (!isSub) { + BuildMI(MBB, MI, DL, HII->get(getOperation(Op, true, false)), Dest) + .addReg(Src2) + .addReg(Reg1); + SfHfInstrList.push_back(&MI); + } else // don't delete the convert instruction for vsub + DefOp2_del = false; + } + } else { // none of the operands are from convert instructions + } + + if (DefOp1_del) + ConvInstrList.insert(DefOp1); + if (DefOp2_del) + ConvInstrList.insert(DefOp2); +} + +// Removes converts feeding to op(qf,sf), and replaces its sf operands with qf +void VectorConvertRemove::handle_addsub_qf_sf(MachineInstr &MI, Register &Reg1, + Register &Reg2, Register &Dest, + bool isAdd) { + MachineBasicBlock &MBB = *MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + Register Src; + bool conv = false; + + MachineInstr *DefOp = MRI->getVRegDef(Reg2); + // check if the second operand is from a convert operation + if (DefOp->getOpcode() == Hexagon::V6_vconv_sf_qf32) { + if (checkHVXUses32(DefOp, &MI)) + ConvInstrList.insert(DefOp); + Src = DefOp->getOperand(1).getReg(); + conv = true; + } + + if (conv) { + BuildMI(MBB, MI, DL, + HII->get(isAdd ? Hexagon::V6_vadd_qf32 : Hexagon::V6_vsub_qf32), + Dest) + .addReg(Reg1) + .addReg(Src); + SfHfInstrList.push_back(&MI); + } +} + +// Removes converts feeding to op(qf,hf), and replaces its hf operands with qf +void VectorConvertRemove::handle_addsubmul_qf_hf(MachineInstr &MI, + Register &Reg1, Register &Reg2, + Register &Dest, Operation Op) { + MachineBasicBlock &MBB = *MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + Register Src; + bool conv = false; + + MachineInstr *DefOp = MRI->getVRegDef(Reg2); + // check if the second operand is from a convert operation + if (DefOp->getOpcode() == Hexagon::V6_vconv_hf_qf16) { + if (checkHVXUses16(DefOp, &MI)) + ConvInstrList.insert(DefOp); + Src = DefOp->getOperand(1).getReg(); + conv = true; + } + + if (conv) { + BuildMI(MBB, MI, DL, HII->get(getOperation(Op, true, true)), Dest) + .addReg(Reg1) + .addReg(Src); + SfHfInstrList.push_back(&MI); + } +} + +// Removes converts feeding to op(sf,sf), and replaces its sf operands with qf +void VectorConvertRemove::handle_qf32_mul_sf_sf(MachineInstr &MI, + Register &Reg1, Register &Reg2, + Register &Dest) { + MachineBasicBlock &MBB = *MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + Register Src1, Src2; + bool firstConv = false, secConv = false; + + MachineInstr *DefOp1 = MRI->getVRegDef(Reg1); + MachineInstr *DefOp2 = MRI->getVRegDef(Reg2); + + if (DefOp1->getOpcode() == Hexagon::V6_vconv_sf_qf32 && + DefOp2->getOpcode() == Hexagon::V6_vconv_sf_qf32) { + // If yes, we can remove the convert + if (checkHVXUses32(DefOp1, &MI) && checkHVXUses32(DefOp2, &MI)) { + ConvInstrList.insert(DefOp1); + ConvInstrList.insert(DefOp2); + } + Src1 = DefOp1->getOperand(1).getReg(); + Src2 = DefOp2->getOperand(1).getReg(); + firstConv = true; + secConv = true; + } + + // If both are true, then only replace with qf32 = vmpy(qf32, qf32) + if (firstConv && secConv) { + BuildMI(MBB, MI, DL, HII->get(Hexagon::V6_vmpy_qf32), Dest) + .addReg(Src1) + .addReg(Src2); + SfHfInstrList.push_back(&MI); + } +} + +void VectorConvertRemove::run() { + for (auto &MBB : MF) { + for (auto &MI : MBB) { + // Skip if the instruction does not have two operands, + // or is a bundle instruction + // or is a debug instruction + if (MI.getNumOperands() != 3 || MI.isDebugInstr()) + continue; + + auto Op1 = MI.getOperand(1); + if (!Op1.isReg()) + continue; + auto Op2 = MI.getOperand(2); + if (!Op2.isReg()) + continue; + auto Op0 = MI.getOperand(0); + if (!Op0.isReg()) + continue; + Register Reg1 = Op1.getReg(); + Register Reg2 = Op2.getReg(); + Register Dest = Op0.getReg(); + + switch (MI.getOpcode()) { + // TODO Handle the new vsub instructions + // qf32 = vadd(sf, sf) + case Hexagon::V6_vadd_sf: + handle_addsub_sf_sf(MI, Reg1, Reg2, Dest, true); + break; + // qf32 = vsub(sf, sf) + case Hexagon::V6_vsub_sf: + handle_addsub_sf_sf(MI, Reg1, Reg2, Dest, false); + break; + // qf32 = vadd(qf32, sf) + case Hexagon::V6_vadd_qf32_mix: + handle_addsub_qf_sf(MI, Reg1, Reg2, Dest, true); + break; + // qf32 = vsub(qf32, sf) + case Hexagon::V6_vsub_qf32_mix: + handle_addsub_qf_sf(MI, Reg1, Reg2, Dest, false); + break; + // qf16 = vadd(hf, hf) + case Hexagon::V6_vadd_hf: + handle_addsubmul_hf_hf(MI, Reg1, Reg2, Dest, Operation::Add16); + break; + // qf16 = vsub(hf, hf) + case Hexagon::V6_vsub_hf: + handle_addsubmul_hf_hf(MI, Reg1, Reg2, Dest, Operation::Sub16); + break; + // qf16 = vadd(qf16, hf) + case Hexagon::V6_vadd_qf16_mix: + handle_addsubmul_qf_hf(MI, Reg1, Reg2, Dest, Operation::Add16); + break; + // qf16 = vsub(qf16, hf) + case Hexagon::V6_vsub_qf16_mix: + handle_addsubmul_qf_hf(MI, Reg1, Reg2, Dest, Operation::Sub16); + break; + // qf32 = vmpy(sf, sf) + case Hexagon::V6_vmpy_qf32_sf: + handle_qf32_mul_sf_sf(MI, Reg1, Reg2, Dest); + break; + // qf32 = vmpy(hf, hf) + case Hexagon::V6_vmpy_qf32_hf: + handle_addsubmul_hf_hf(MI, Reg1, Reg2, Dest, Operation::Mul32); + break; + // qf32 = vmpy(qf16, hf) + case Hexagon::V6_vmpy_qf32_mix_hf: + handle_addsubmul_qf_hf(MI, Reg1, Reg2, Dest, Operation::Mul32); + break; + // qf16 = vmpy(hf, hf) + case Hexagon::V6_vmpy_qf16_hf: + handle_addsubmul_hf_hf(MI, Reg1, Reg2, Dest, Operation::Mul16); + break; + // qf16 = vmpy(qf16, hf) + case Hexagon::V6_vmpy_qf16_mix_hf: + handle_addsubmul_qf_hf(MI, Reg1, Reg2, Dest, Operation::Mul16); + ; + break; + default: + break; + } + } + } + + // Delete the vadd/vsub/vmpy instructions + for (MachineInstr *sfhfMI : SfHfInstrList) { + LLVM_DEBUG(dbgs() << "deleting sf/hf instruction "); + LLVM_DEBUG(sfhfMI->dump()); + sfhfMI->eraseFromParent(); + } + // Delete conversion instructions + for (MachineInstr *convMI : ConvInstrList) { + LLVM_DEBUG(dbgs() << "deleting conversion instruction"); + LLVM_DEBUG(convMI->dump()); + convMI->eraseFromParent(); + } +} + +char HexagonXQFloatGenerator::ID = 0; + +} // namespace + +INITIALIZE_PASS(HexagonXQFloatGenerator, "hexagon-xqfloat-generator", + HEXAGON_XQFLOAT_GENERATOR, false, false) + +FunctionPass *llvm::createHexagonXQFloatGenerator() { + return new HexagonXQFloatGenerator(); +} + +// Returns true if qf32 input is from an adder/subtract unit +bool HexagonXQFloatGenerator::checkIfInputFromAdder32(Register Reg) { + MachineInstr *Def = MRI->getVRegDef(Reg); + if (!Def) + return false; + + // If the definition is a copy, we need to analyze its def again + if (Def->getOpcode() == TargetOpcode::COPY) { + Register SrcReg = Def->getOperand(1).getReg(); + if (SrcReg.isValid()) + return checkIfInputFromAdder32(SrcReg); + return false; + } else if (Def->getOpcode() == TargetOpcode::REG_SEQUENCE) { + Register SrcReg1 = Def->getOperand(1).getReg(); + Register SrcReg2 = Def->getOperand(2).getReg(); + bool isTrue = false; + if (SrcReg1.isValid()) + isTrue = checkIfInputFromAdder32(SrcReg1); + if (SrcReg2.isValid()) + isTrue = checkIfInputFromAdder32(SrcReg2); ---------------- fhossein-quic wrote:
Thanks for catching that. Fixed now by using `=|` https://github.com/llvm/llvm-project/pull/198902 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
