I just tried such kernel, and the generated GEN IR is INDIRECT_MOV, it has nothing to do with this patch.
Thanks Yejun -----Original Message----- From: Yang, Rong R Sent: Tuesday, June 13, 2017 3:54 PM To: Guo, Yejun; Wang, Rander; Pan, Xiuli; beignet@lists.freedesktop.org Subject: RE: [Beignet] [PATCH] do constant folding for kernel struct args Has you consider the value from two arguments case. For example: Struct s1{ int i, float4 f4; } Struct s2{ int i; short s; float4 f4; } __kernel void k(s1, s2, __global float *dst) { int gid = get_global_id(0); float4 *p; if (gid % 2) { p = &s1.f4; } else { P = &s2.f4; } dst[gid] = *p.s1; } > -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf > Of Guo, Yejun > Sent: Thursday, June 8, 2017 21:08 > To: Wang, Rander <rander.w...@intel.com>; Pan, Xiuli > <xiuli....@intel.com>; beignet@lists.freedesktop.org > Subject: Re: [Beignet] [PATCH] do constant folding for kernel struct > args > > Yes, the constant folding for kernel struct arg is a must here. > > As for the general constant folding and propagation optimization, I do > not have a position that sel ir or gen ir is better. > > -----Original Message----- > From: Wang, Rander > Sent: Thursday, June 08, 2017 1:14 PM > To: Pan, Xiuli; Guo, Yejun; beignet@lists.freedesktop.org > Cc: Guo, Yejun > Subject: RE: [Beignet] [PATCH] do constant folding for kernel struct > args > > Yes, so I may be able to give some advice > > -----Original Message----- > From: Pan, Xiuli > Sent: Thursday, June 8, 2017 1:09 PM > To: Guo, Yejun <yejun....@intel.com>; beignet@lists.freedesktop.org > Cc: Guo, Yejun <yejun....@intel.com>; Wang, Rander > <rander.w...@intel.com> > Subject: RE: [Beignet] [PATCH] do constant folding for kernel struct > args > > Rander seems to have a similar optimization about imm value at sel ir. > If your case here need the optimization done in GEN IR level then > rander's patch may no longer be needed. > > -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf > Of Guo, Yejun > Sent: Thursday, June 8, 2017 12:41 > To: beignet@lists.freedesktop.org > Cc: Guo, Yejun <yejun....@intel.com> > Subject: [Beignet] [PATCH] do constant folding for kernel struct args > > for the following GEN IR, %41 is kernel argument (struct) the first > LOAD will be mov, and the second LOAD will be indirect move (see > lowerFunctionArguments). It hurts performance, and even impacts the > correctness of reg liveness of indriect mov > > LOADI.uint64 %1114 72 > ADD.int64 %78 %41 %1114 > LOAD.int64.private.aligned {%79} %78 bti:255 > LOADI.int64 %1115 8 > ADD.int64 %1116 %78 %1115 > LOAD.int64.private.aligned {%80} %1116 bti:255 > > this function folds the constants of 72 and 8 together, and so it will > be direct mov. > the GEN IR looks like: > LOADI.int64 %1115 80 > ADD.int64 %1116 %41 %1115 > --- > backend/src/CMakeLists.txt | 2 + > backend/src/ir/constopt.cpp | 144 > +++++++++++++++++++++++++++++++++++++++++ > backend/src/ir/constopt.hpp | 54 ++++++++++++++++ > backend/src/ir/context.cpp | 5 ++ > backend/src/ir/instruction.cpp | 7 ++ > backend/src/ir/instruction.hpp | 1 + > 6 files changed, 213 insertions(+) > create mode 100644 backend/src/ir/constopt.cpp create mode 100644 > backend/src/ir/constopt.hpp > > diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt > index c9ff833..74d7bab 100644 > --- a/backend/src/CMakeLists.txt > +++ b/backend/src/CMakeLists.txt > @@ -73,6 +73,8 @@ set (GBE_SRC > ir/value.hpp > ir/lowering.cpp > ir/lowering.hpp > + ir/constopt.cpp > + ir/constopt.hpp > ir/profiling.cpp > ir/profiling.hpp > ir/printf.cpp > diff --git a/backend/src/ir/constopt.cpp b/backend/src/ir/constopt.cpp > new file mode 100644 index 0000000..24878b8 > --- /dev/null > +++ b/backend/src/ir/constopt.cpp > @@ -0,0 +1,144 @@ > +/* > + * Copyright © 2017 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + * > + * Author: Guo Yejun <yejun....@intel.com> */ > + > +#include <assert.h> > +#include "ir/context.hpp" > +#include "ir/value.hpp" > +#include "ir/constopt.hpp" > +#include "sys/set.hpp" > + > +namespace gbe { > +namespace ir { > + > + class FunctionStructArgConstOffsetFolder : public Context { > + public: > + /*! Build the helper structure */ > + FunctionStructArgConstOffsetFolder(Unit &unit) : Context(unit) { > + records.clear(); > + loadImms.clear(); > + } > + /*! Free everything we needed */ > + virtual ~FunctionStructArgConstOffsetFolder() { > + for (size_t i = 0; i < records.size(); ++i) { > + delete records[i]; > + } > + records.clear(); > + loadImms.clear(); > + } > + /*! Perform all function arguments substitution if needed */ > + void folding(const std::string &name); > + > + private: > + class Record { //add dst, arg (kernel struct arg base reg), imm_value > + public: > + Record(Register dst, Register arg, int64_t immv) : > + _dst(dst), _arg(arg), _immv(immv) { } > + Register _dst; > + Register _arg; > + int64_t _immv; > + }; > + std::vector<Record*> records; > + std::map<Register, LoadImmInstruction*> loadImms; //<ir reg, load > + reg imm> > + > + void AddRecord(Register dst, Register arg, int64_t immv) { > + Record* rec = new Record(dst, arg, immv); > + records.push_back(rec); > + } > + }; > + > + void FunctionStructArgConstOffsetFolder::folding(const std::string > + &name) > { > + Function *fn = unit.getFunction(name); > + if (fn == NULL) > + return; > + > + const uint32_t argNum = fn->argNum(); > + for (uint32_t argID = 0; argID < argNum; ++argID) { > + FunctionArgument &arg = fn->getArg(argID); > + if (arg.type != FunctionArgument::STRUCTURE) > + continue; > + AddRecord(arg.reg, arg.reg, 0); > + } > + > + fn->foreachInstruction([&](Instruction &insn) { > + if (insn.getOpcode() == OP_LOADI) { > + LoadImmInstruction *loadImm = cast<LoadImmInstruction>(&insn); > + if(!loadImm) > + return; > + > + //to avoid regression, limit for the case: LOADI.int64 %1164 32 > + //we can loose the limit if necessary > + if (loadImm->getImmediate().getType() != TYPE_S64 && > + loadImm->getImmediate().getType() != TYPE_U64) > + return; > + > + Register dst = insn.getDst(); > + loadImms[dst] = loadImm; > + return; > + } > + > + //we will change imm of loadi directly, so it should not be dst > + for (size_t i = 0; i < insn.getDstNum(); ++i) { > + Register dst = insn.getDst(i); > + assert(loadImms.find(dst) == loadImms.end()); > + } > + > + if (insn.getOpcode() != OP_ADD) > + return; > + > + Register src0 = insn.getSrc(0); > + Register src1 = insn.getSrc(1); > + Register dst = insn.getDst(); > + > + //check if src0 is derived from kernel struct arg > + std::vector<Record*>::iterator it = > + std::find_if(records.begin(), records.end(), [=](Record* rec){ > + return rec->_dst > == src0; > + } ); > + if (it == records.end()) > + return; > + > + //check if src1 is imm value > + if (loadImms.find(src1) == loadImms.end()) > + return; > + > + Record* rec = *it; > + LoadImmInstruction *loadImm = loadImms[src1]; > + Immediate imm = loadImm->getImmediate(); > + int64_t newvalue = imm.getIntegerValue() + rec->_immv; > + > + if (rec->_dst != rec->_arg) { //directly dervied from arg if they are > equal > + //change src0 to be the kernel struct arg > + insn.setSrc(0, rec->_arg); > + > + //change the value of src1 > + ImmediateIndex immIndex = fn->newImmediate(newvalue); > + loadImm->setImmediateIndex(immIndex); > + } > + AddRecord(dst, rec->_arg, newvalue); > + }); > + } > + > + void foldFunctionStructArgConstOffset(Unit &unit, const std::string > &functionName) { > + FunctionStructArgConstOffsetFolder folder(unit); > + folder.folding(functionName); > + } > + > +} /* namespace ir */ > +} > diff --git a/backend/src/ir/constopt.hpp b/backend/src/ir/constopt.hpp > new file mode 100644 index 0000000..f272637 > --- /dev/null > +++ b/backend/src/ir/constopt.hpp > @@ -0,0 +1,54 @@ > +/* > + * Copyright © 2017 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + * > + * Author: Guo Yejun <yejun....@intel.com> */ > + > +#ifndef __GBE_IR_CONSTOPT_HPP__ > +#define __GBE_IR_CONSTOPT_HPP__ > + > +namespace gbe { > +namespace ir { > + > + // Structure to update > + class Unit; > + > + // TODO > + void foldConstant(Unit &unit, const std::string &functionName); > + void propagateConstant(Unit &unit, const std::string &functionName); > + > + // for the following GEN IR, %41 is kernel argument (struct) > + // the first LOAD will be mov, and the second LOAD will be indirect > +move > + // (see lowerFunctionArguments). It hurts performance, > + // and even impacts the correctness of reg liveness of indriect mov > + // > + // LOADI.uint64 %1114 72 > + // ADD.int64 %78 %41 %1114 > + // LOAD.int64.private.aligned {%79} %78 bti:255 > + // LOADI.int64 %1115 8 > + // ADD.int64 %1116 %78 %1115 > + // LOAD.int64.private.aligned {%80} %1116 bti:255 > + // > + // this function folds the constants of 72 and 8 together, > + // and so it will be direct mov. > + // the GEN IR looks like: > + // LOADI.int64 %1115 80 > + // ADD.int64 %1116 %41 %1115 > + void foldFunctionStructArgConstOffset(Unit &unit, const std::string > +&functionName); } /* namespace ir */ } /* namespace gbe */ > + > +#endif /* __GBE_IR_LOWERING_HPP__ */ > diff --git a/backend/src/ir/context.cpp b/backend/src/ir/context.cpp > index e4aac08..f60d33f 100644 > --- a/backend/src/ir/context.cpp > +++ b/backend/src/ir/context.cpp > @@ -24,6 +24,7 @@ > #include "ir/context.hpp" > #include "ir/unit.hpp" > #include "ir/lowering.hpp" > +#include "ir/constopt.hpp" > > namespace gbe { > namespace ir { > @@ -82,6 +83,10 @@ namespace ir { > fn->sortLabels(); > fn->computeCFG(); > > + //TODO: do constant folding and propagation for GEN IR > + //here as the first step, we just do constant folding for kernel struct > args > + foldFunctionStructArgConstOffset(unit, fn->getName()); > + > // Spill function argument to the stack if required and identify which > // function arguments can use constant push > lowerFunctionArguments(unit, fn->getName()); diff --git > a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index a9156ff..48590fd 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -833,6 +833,9 @@ namespace ir { > INLINE Immediate getImmediate(const Function &fn) const { > return fn.getImmediate(immediateIndex); > } > + INLINE void setImmediateIndex(ImmediateIndex immIndex) { > + immediateIndex = immIndex; > + } > INLINE Type getType(void) const { return this->type; } > bool wellFormed(const Function &fn, std::string &why) const; > INLINE void out(std::ostream &out, const Function &fn) const; > @@ -2445,6 +2448,10 @@ DECL_MEM_FN(MemInstruction, void, > setBtiReg(Register reg), setBtiReg(reg)) > return reinterpret_cast<const > internal::LoadImmInstruction*>(this)- > >getImmediate(fn); > } > > + void LoadImmInstruction::setImmediateIndex(ImmediateIndex immIndex) > { > + > + reinterpret_cast<internal::LoadImmInstruction*>(this)->setImmediateI > + nd > + ex(immIndex); > + } > + > /////////////////////////////////////////////////////////////////////////// > // Implements the emission functions > > ////////////////////////////////////////////////////////////////////// > ///// diff --git a/backend/src/ir/instruction.hpp > b/backend/src/ir/instruction.hpp index 8685dd4..05c3e64 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -389,6 +389,7 @@ namespace ir { > public: > /*! Return the value stored in the instruction */ > Immediate getImmediate(void) const; > + void setImmediateIndex(ImmediateIndex immIndex); > /*! Return the type of the stored value */ > Type getType(void) const; > /*! Return true if the given instruction is an instance of this > class */ > -- > 2.7.4 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet