[Beignet] [PATCH 6/8] Backend: Implement FDIV64 on BDW.

2015-09-15 Thread junyan . he
From: Junyan He 

According to the document, we use a set of instructions
to implement double type division.

Signed-off-by: Junyan He 
---
 backend/src/backend/gen8_context.cpp | 68 
 backend/src/backend/gen8_context.hpp |  2 ++
 2 files changed, 70 insertions(+)

diff --git a/backend/src/backend/gen8_context.cpp 
b/backend/src/backend/gen8_context.cpp
index b497ee5..f465832 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -924,6 +924,74 @@ namespace gbe
 this->unpackLongVec(src, dst, p->curr.execWidth);
   }
 
+  void Gen8Context::emitF64DIVInstruction(const SelectionInstruction ) {
+/* Macro for Double Precision IEEE Compliant fdiv
+
+   Set Rounding Mode in CR to RNE
+   GRF are initialized: r0 = 0, r6 = a, r7 = b, r1 = 1
+   The default data type for the macro is :df
+
+   math.eo.f0.0 (4) r8.acc2 r6.noacc r7.noacc 0xE
+   (-f0.0) if
+   madm (4) r9.acc3 r0.noacc r6.noacc r8.acc2   // Step(1), q0=a*y0
+   madm (4) r10.acc4 r1.noacc -r7.noacc r8.acc2 // Step(2), e0=(1-b*y0)
+   madm (4) r11.acc5 r6.noacc -r7.noacc r9.acc3 // Step(3), r0=a-b*q0
+   madm (4) r12.acc6 r8.acc2 r10.acc4 r8.acc2   // Step(4), y1=y0+e0*y0
+   madm (4) r13.acc7 r1.noacc -r7.noacc r12.acc6// Step(5), e1=(1-b*y1)
+   madm (4) r8.acc8 r8.acc2 r10.acc4 r12.acc6   // Step(6), y2=y0+e0*y1
+   madm (4) r9.acc9 r9.acc3 r11.acc5 r12.acc6   // Step(7), q1=q0+r0*y1
+   madm (4) r12.acc2 r12.acc6 r8.acc8 r13.acc7  // Step(8), y3=y1+e1*y2
+   madm (4) r11.acc3 r6.noacc -r7.noacc r9.acc9 // Step(9), r1=a-b*q1
+
+   Change Rounding Mode in CR if required
+   Implicit Accumulator for destination is NULL
+
+   madm (4) r8.noacc r9.acc9 r11.acc3 r12.acc2  // Step(10), q=q1+r1*y3
+   endif */
+GenRegister r6 = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_DF);
+GenRegister r7 = GenRegister::retype(ra->genReg(insn.src(1)), GEN_TYPE_DF);
+GenRegister r8 = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_DF);
+const GenRegister r0 = GenRegister::retype(ra->genReg(insn.dst(1)), 
GEN_TYPE_DF);
+const GenRegister r1 = GenRegister::retype(ra->genReg(insn.dst(2)), 
GEN_TYPE_DF);
+const GenRegister r9 = GenRegister::retype(ra->genReg(insn.dst(3)), 
GEN_TYPE_DF);
+const GenRegister r10 = GenRegister::retype(ra->genReg(insn.dst(4)), 
GEN_TYPE_DF);
+const GenRegister r11 = GenRegister::retype(ra->genReg(insn.dst(5)), 
GEN_TYPE_DF);
+const GenRegister r12 = GenRegister::retype(ra->genReg(insn.dst(6)), 
GEN_TYPE_DF);
+const GenRegister r13 = GenRegister::retype(ra->genReg(insn.dst(7)), 
GEN_TYPE_DF);
+Gen8Encoder *p8 = reinterpret_cast(p);
+p->push(); {
+  p->curr.execWidth = 4;
+  p->curr.predicate = GEN_PREDICATE_NONE;
+  p->curr.noMask= 1;
+  p->MOV(r1, GenRegister::immdf(1.0d));
+  p->MOV(r0, GenRegister::immdf(0.0d));
+
+  for (int i = 0; i < (simdWidth == 16 ? 4 : 2); i++) {
+p->curr.predicate = GEN_PREDICATE_NONE;
+p8->MATH_WITH_ACC(r8, GEN8_MATH_FUNCTION_INVM, r6, r7, GEN8_INSN_ACC2, 
GEN8_INSN_NOACC, GEN8_INSN_NOACC);
+p->curr.useFlag(insn.state.flag, insn.state.subFlag);
+p->curr.predicate = GEN_PREDICATE_NORMAL;
+p->curr.inversePredicate = 1;
+p->curr.noMask= 0;
+p8->MADM(r9, r0, r6, r8, GEN8_INSN_ACC3, GEN8_INSN_NOACC, 
GEN8_INSN_NOACC, GEN8_INSN_ACC2);
+p8->MADM(r10, r1, GenRegister::negate(r7), r8, GEN8_INSN_ACC4, 
GEN8_INSN_NOACC, GEN8_INSN_NOACC, GEN8_INSN_ACC2);
+p8->MADM(r11, r6, GenRegister::negate(r7), r9, GEN8_INSN_ACC5, 
GEN8_INSN_NOACC, GEN8_INSN_NOACC, GEN8_INSN_ACC3);
+p8->MADM(r12, r8, r10, r8, GEN8_INSN_ACC6, GEN8_INSN_ACC2, 
GEN8_INSN_ACC4, GEN8_INSN_ACC2);
+p8->MADM(r13, r1, GenRegister::negate(r7), r12, GEN8_INSN_ACC7, 
GEN8_INSN_NOACC, GEN8_INSN_NOACC, GEN8_INSN_ACC6);
+p8->MADM(r8, r8, r10, r12, GEN8_INSN_ACC8, GEN8_INSN_ACC2, 
GEN8_INSN_ACC4, GEN8_INSN_ACC6);
+p8->MADM(r9, r9, r11, r12, GEN8_INSN_ACC9, GEN8_INSN_ACC3, 
GEN8_INSN_ACC5, GEN8_INSN_ACC6);
+p8->MADM(r12, r12, r8, r13, GEN8_INSN_ACC2, GEN8_INSN_ACC6, 
GEN8_INSN_ACC8, GEN8_INSN_ACC7);
+p8->MADM(r11, r6, GenRegister::negate(r7), r9, GEN8_INSN_ACC3, 
GEN8_INSN_NOACC, GEN8_INSN_NOACC, GEN8_INSN_ACC9);
+
+p8->MADM(r8, r9, r11, r12, GEN8_INSN_NOACC, GEN8_INSN_ACC9, 
GEN8_INSN_ACC3, GEN8_INSN_ACC2);
+
+r6 = GenRegister::offset(r6, 1);
+r7 = GenRegister::offset(r7, 1);
+r8 = GenRegister::offset(r8, 1);
+  }
+} p->pop();
+  }
+
   void Gen8Context::setA0Content(uint16_t new_a0[16], uint16_t max_offset, int 
sz) {
 if (sz == 0)
   sz = 16;
diff --git a/backend/src/backend/gen8_context.hpp 
b/backend/src/backend/gen8_context.hpp
index 84508e9..386f7f3 100644
--- 

Re: [Beignet] [PATCH 6/8] Backend: Implement FDIV64 on BDW.

2015-09-15 Thread He Junyan
On Tue, Sep 15, 2015 at 06:00:57AM -0700, Matt Turner wrote:
> Date: Tue, 15 Sep 2015 06:00:57 -0700
> From: Matt Turner <matts...@gmail.com>
> To: "junyan.he" <junyan...@inbox.com>
> Cc: "beignet@lists.freedesktop.org" <beignet@lists.freedesktop.org>
> Subject: Re: [Beignet] [PATCH 6/8] Backend: Implement FDIV64 on BDW.
> 
> On Tue, Sep 15, 2015 at 4:15 AM,  <junyan...@inbox.com> wrote:
> > From: Junyan He <junyan...@linux.intel.com>
> >
> > According to the document, we use a set of instructions
> > to implement double type division.
> >
> > Signed-off-by: Junyan He <junyan...@linux.intel.com>
> > ---
> >  backend/src/backend/gen8_context.cpp | 68 
> > 
> >  backend/src/backend/gen8_context.hpp |  2 ++
> >  2 files changed, 70 insertions(+)
> >
> > diff --git a/backend/src/backend/gen8_context.cpp 
> > b/backend/src/backend/gen8_context.cpp
> > index b497ee5..f465832 100644
> > --- a/backend/src/backend/gen8_context.cpp
> > +++ b/backend/src/backend/gen8_context.cpp
> > @@ -924,6 +924,74 @@ namespace gbe
> >  this->unpackLongVec(src, dst, p->curr.execWidth);
> >}
> >
> > +  void Gen8Context::emitF64DIVInstruction(const SelectionInstruction 
> > ) {
> > +/* Macro for Double Precision IEEE Compliant fdiv
> > +
> > +   Set Rounding Mode in CR to RNE
> > +   GRF are initialized: r0 = 0, r6 = a, r7 = b, r1 = 1
> > +   The default data type for the macro is :df
> > +
> > +   math.eo.f0.0 (4) r8.acc2 r6.noacc r7.noacc 0xE
> > +   (-f0.0) if
> > +   madm (4) r9.acc3 r0.noacc r6.noacc r8.acc2   // Step(1), q0=a*y0
> > +   madm (4) r10.acc4 r1.noacc -r7.noacc r8.acc2 // Step(2), 
> > e0=(1-b*y0)
> > +   madm (4) r11.acc5 r6.noacc -r7.noacc r9.acc3 // Step(3), 
> > r0=a-b*q0
> > +   madm (4) r12.acc6 r8.acc2 r10.acc4 r8.acc2   // Step(4), 
> > y1=y0+e0*y0
> > +   madm (4) r13.acc7 r1.noacc -r7.noacc r12.acc6// Step(5), 
> > e1=(1-b*y1)
> > +   madm (4) r8.acc8 r8.acc2 r10.acc4 r12.acc6   // Step(6), 
> > y2=y0+e0*y1
> > +   madm (4) r9.acc9 r9.acc3 r11.acc5 r12.acc6   // Step(7), 
> > q1=q0+r0*y1
> > +   madm (4) r12.acc2 r12.acc6 r8.acc8 r13.acc7  // Step(8), 
> > y3=y1+e1*y2
> > +   madm (4) r11.acc3 r6.noacc -r7.noacc r9.acc9 // Step(9), 
> > r1=a-b*q1
> > +
> > +   Change Rounding Mode in CR if required
> > +   Implicit Accumulator for destination is NULL
> > +
> > +   madm (4) r8.noacc r9.acc9 r11.acc3 r12.acc2  // Step(10), 
> > q=q1+r1*y3
> > +   endif */
> 
> I don't see an IF or an ENDIF instruction emitted in the code below.
> Is that intentional, or am I misreading the code?
> 
Here, we use f0.1 as the predication for all the instructions, like:
(-f0.1) madm (4) r9.acc3 r0.noacc r6.noacc r8.acc2 
(-f0.1) madm (4) r10.acc4 r1.noacc -r7.noacc r8.acc2
.
I avoid using IF-Endif here, because we need to calculate the instruction number
within IF clause, and it is not convenient.

> > +GenRegister r6 = GenRegister::retype(ra->genReg(insn.src(0)), 
> > GEN_TYPE_DF);
> > +GenRegister r7 = GenRegister::retype(ra->genReg(insn.src(1)), 
> > GEN_TYPE_DF);
> > +GenRegister r8 = GenRegister::retype(ra->genReg(insn.dst(0)), 
> > GEN_TYPE_DF);
> > +const GenRegister r0 = GenRegister::retype(ra->genReg(insn.dst(1)), 
> > GEN_TYPE_DF);
> > +const GenRegister r1 = GenRegister::retype(ra->genReg(insn.dst(2)), 
> > GEN_TYPE_DF);
> > +const GenRegister r9 = GenRegister::retype(ra->genReg(insn.dst(3)), 
> > GEN_TYPE_DF);
> > +const GenRegister r10 = GenRegister::retype(ra->genReg(insn.dst(4)), 
> > GEN_TYPE_DF);
> > +const GenRegister r11 = GenRegister::retype(ra->genReg(insn.dst(5)), 
> > GEN_TYPE_DF);
> > +const GenRegister r12 = GenRegister::retype(ra->genReg(insn.dst(6)), 
> > GEN_TYPE_DF);
> > +const GenRegister r13 = GenRegister::retype(ra->genReg(insn.dst(7)), 
> > GEN_TYPE_DF);
> > +Gen8Encoder *p8 = reinterpret_cast(p);
> > +p->push(); {
> > +  p->curr.execWidth = 4;
> > +  p->curr.predicate = GEN_PREDICATE_NONE;
> > +  p->curr.noMask= 1;
> > +  p->MOV(r1, GenRegister::immdf(1.0d));
> > +  p->MOV(r0, GenRegister::immdf(0.0d));
> > +
> > +  for (int i = 0; i < (simdWidth == 16 ? 4 : 2); i++) {
> > +p->curr.predicate 

Re: [Beignet] [PATCH 6/8] Backend: Implement FDIV64 on BDW.

2015-09-15 Thread Matt Turner
On Tue, Sep 15, 2015 at 4:15 AM,   wrote:
> From: Junyan He 
>
> According to the document, we use a set of instructions
> to implement double type division.
>
> Signed-off-by: Junyan He 
> ---
>  backend/src/backend/gen8_context.cpp | 68 
> 
>  backend/src/backend/gen8_context.hpp |  2 ++
>  2 files changed, 70 insertions(+)
>
> diff --git a/backend/src/backend/gen8_context.cpp 
> b/backend/src/backend/gen8_context.cpp
> index b497ee5..f465832 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -924,6 +924,74 @@ namespace gbe
>  this->unpackLongVec(src, dst, p->curr.execWidth);
>}
>
> +  void Gen8Context::emitF64DIVInstruction(const SelectionInstruction ) {
> +/* Macro for Double Precision IEEE Compliant fdiv
> +
> +   Set Rounding Mode in CR to RNE
> +   GRF are initialized: r0 = 0, r6 = a, r7 = b, r1 = 1
> +   The default data type for the macro is :df
> +
> +   math.eo.f0.0 (4) r8.acc2 r6.noacc r7.noacc 0xE
> +   (-f0.0) if
> +   madm (4) r9.acc3 r0.noacc r6.noacc r8.acc2   // Step(1), q0=a*y0
> +   madm (4) r10.acc4 r1.noacc -r7.noacc r8.acc2 // Step(2), 
> e0=(1-b*y0)
> +   madm (4) r11.acc5 r6.noacc -r7.noacc r9.acc3 // Step(3), r0=a-b*q0
> +   madm (4) r12.acc6 r8.acc2 r10.acc4 r8.acc2   // Step(4), 
> y1=y0+e0*y0
> +   madm (4) r13.acc7 r1.noacc -r7.noacc r12.acc6// Step(5), 
> e1=(1-b*y1)
> +   madm (4) r8.acc8 r8.acc2 r10.acc4 r12.acc6   // Step(6), 
> y2=y0+e0*y1
> +   madm (4) r9.acc9 r9.acc3 r11.acc5 r12.acc6   // Step(7), 
> q1=q0+r0*y1
> +   madm (4) r12.acc2 r12.acc6 r8.acc8 r13.acc7  // Step(8), 
> y3=y1+e1*y2
> +   madm (4) r11.acc3 r6.noacc -r7.noacc r9.acc9 // Step(9), r1=a-b*q1
> +
> +   Change Rounding Mode in CR if required
> +   Implicit Accumulator for destination is NULL
> +
> +   madm (4) r8.noacc r9.acc9 r11.acc3 r12.acc2  // Step(10), 
> q=q1+r1*y3
> +   endif */

I don't see an IF or an ENDIF instruction emitted in the code below.
Is that intentional, or am I misreading the code?

> +GenRegister r6 = GenRegister::retype(ra->genReg(insn.src(0)), 
> GEN_TYPE_DF);
> +GenRegister r7 = GenRegister::retype(ra->genReg(insn.src(1)), 
> GEN_TYPE_DF);
> +GenRegister r8 = GenRegister::retype(ra->genReg(insn.dst(0)), 
> GEN_TYPE_DF);
> +const GenRegister r0 = GenRegister::retype(ra->genReg(insn.dst(1)), 
> GEN_TYPE_DF);
> +const GenRegister r1 = GenRegister::retype(ra->genReg(insn.dst(2)), 
> GEN_TYPE_DF);
> +const GenRegister r9 = GenRegister::retype(ra->genReg(insn.dst(3)), 
> GEN_TYPE_DF);
> +const GenRegister r10 = GenRegister::retype(ra->genReg(insn.dst(4)), 
> GEN_TYPE_DF);
> +const GenRegister r11 = GenRegister::retype(ra->genReg(insn.dst(5)), 
> GEN_TYPE_DF);
> +const GenRegister r12 = GenRegister::retype(ra->genReg(insn.dst(6)), 
> GEN_TYPE_DF);
> +const GenRegister r13 = GenRegister::retype(ra->genReg(insn.dst(7)), 
> GEN_TYPE_DF);
> +Gen8Encoder *p8 = reinterpret_cast(p);
> +p->push(); {
> +  p->curr.execWidth = 4;
> +  p->curr.predicate = GEN_PREDICATE_NONE;
> +  p->curr.noMask= 1;
> +  p->MOV(r1, GenRegister::immdf(1.0d));
> +  p->MOV(r0, GenRegister::immdf(0.0d));
> +
> +  for (int i = 0; i < (simdWidth == 16 ? 4 : 2); i++) {
> +p->curr.predicate = GEN_PREDICATE_NONE;
> +p8->MATH_WITH_ACC(r8, GEN8_MATH_FUNCTION_INVM, r6, r7, 
> GEN8_INSN_ACC2, GEN8_INSN_NOACC, GEN8_INSN_NOACC);
> +p->curr.useFlag(insn.state.flag, insn.state.subFlag);
> +p->curr.predicate = GEN_PREDICATE_NORMAL;
> +p->curr.inversePredicate = 1;
> +p->curr.noMask= 0;
> +p8->MADM(r9, r0, r6, r8, GEN8_INSN_ACC3, GEN8_INSN_NOACC, 
> GEN8_INSN_NOACC, GEN8_INSN_ACC2);
> +p8->MADM(r10, r1, GenRegister::negate(r7), r8, GEN8_INSN_ACC4, 
> GEN8_INSN_NOACC, GEN8_INSN_NOACC, GEN8_INSN_ACC2);
> +p8->MADM(r11, r6, GenRegister::negate(r7), r9, GEN8_INSN_ACC5, 
> GEN8_INSN_NOACC, GEN8_INSN_NOACC, GEN8_INSN_ACC3);
> +p8->MADM(r12, r8, r10, r8, GEN8_INSN_ACC6, GEN8_INSN_ACC2, 
> GEN8_INSN_ACC4, GEN8_INSN_ACC2);
> +p8->MADM(r13, r1, GenRegister::negate(r7), r12, GEN8_INSN_ACC7, 
> GEN8_INSN_NOACC, GEN8_INSN_NOACC, GEN8_INSN_ACC6);
> +p8->MADM(r8, r8, r10, r12, GEN8_INSN_ACC8, GEN8_INSN_ACC2, 
> GEN8_INSN_ACC4, GEN8_INSN_ACC6);
> +p8->MADM(r9, r9, r11, r12, GEN8_INSN_ACC9, GEN8_INSN_ACC3, 
> GEN8_INSN_ACC5, GEN8_INSN_ACC6);
> +p8->MADM(r12, r12, r8, r13, GEN8_INSN_ACC2, GEN8_INSN_ACC6, 
> GEN8_INSN_ACC8, GEN8_INSN_ACC7);
> +p8->MADM(r11, r6, GenRegister::negate(r7), r9, GEN8_INSN_ACC3, 
> GEN8_INSN_NOACC, GEN8_INSN_NOACC, GEN8_INSN_ACC9);
> +
> +p8->MADM(r8, r9, r11, r12, GEN8_INSN_NOACC, GEN8_INSN_ACC9, 
> GEN8_INSN_ACC3, GEN8_INSN_ACC2);
> +
> +