On Thu, Jun 25, 2015 at 2:19 PM, <[email protected]> wrote:
> # HG changeset patch > # User Rajesh Paulraj<[email protected]> > # Date 1435219198 -19800 > # Thu Jun 25 13:29:58 2015 +0530 > # Node ID a03487d6295cf89b065eff36e5c1ec4ee4253243 > # Parent b1af4c36f48a4500a4912373ebcda9a5540b5c15 > asm: sse4 10bit code for sign primitive > > calSign 6.16x 356.91 2197.63 > > diff -r b1af4c36f48a -r a03487d6295c source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Wed Jun 24 10:36:15 2015 > -0500 > +++ b/source/common/x86/asm-primitives.cpp Thu Jun 25 13:29:58 2015 > +0530 > @@ -1097,6 +1097,7 @@ > p.saoCuOrgE3[0] = PFX(saoCuOrgE3_sse4); > p.saoCuOrgE3[1] = PFX(saoCuOrgE3_sse4); > p.saoCuOrgB0 = PFX(saoCuOrgB0_sse4); > + p.sign = x265_calculateSign_sse4; > > This should be PFX(). > LUMA_ADDAVG(sse4); > CHROMA_420_ADDAVG(sse4); > diff -r b1af4c36f48a -r a03487d6295c source/common/x86/loopfilter.asm > --- a/source/common/x86/loopfilter.asm Wed Jun 24 10:36:15 2015 -0500 > +++ b/source/common/x86/loopfilter.asm Thu Jun 25 13:29:58 2015 +0530 > @@ -40,6 +40,7 @@ > cextern pw_2 > cextern pw_1023 > cextern pb_movemask > +cextern pw_1 > > > > > ;============================================================================================================ > @@ -1419,3 +1420,49 @@ > > .end: > RET > + > > +;----------------------------------------------------------------------------- > +; void calSign(int8_t *dst, const pixel *src1, const pixel *src2, const > int endX) > > +;----------------------------------------------------------------------------- > +%if HIGH_BIT_DEPTH > +INIT_XMM sse4 > +cglobal calculateSign, 4, 7, 5 > + mova m0, [pw_1] > + mov r4d, r3d > + shr r3d, 4 > + add r3d, 1 > + mov r5, r0 > + movu m4, [r0 + r4] > +.loop > + movu m1, [r1] ; m2 = pRec[x] > + movu m2, [r2] ; m3 = pTmpU[x] > + > + pcmpgtw m3, m1, m2 > + pcmpgtw m2, m1 > + pand m3, m0 > + por m3, m2 > + packsswb m3, m3 > + movh [r0], xm3 > + > + movu m1, [r1 + 16] ; m2 = pRec[x] > + movu m2, [r2 + 16] ; m3 = pTmpU[x] > + > + pcmpgtw m3, m1, m2 > + pcmpgtw m2, m1 > + pand m3, m0 > + por m3, m2 > + packsswb m3, m3 > + movh [r0 + 8], xm3 > + > + add r0, 16 > + add r1, 32 > + add r2, 32 > + dec r3d > + jnz .loop > + > + mov r6, r0 > + sub r6, r5 > + sub r4, r6 > + movu [r0 + r4], m4 > + RET > +%endif > diff -r b1af4c36f48a -r a03487d6295c source/common/x86/loopfilter.h > --- a/source/common/x86/loopfilter.h Wed Jun 24 10:36:15 2015 -0500 > +++ b/source/common/x86/loopfilter.h Thu Jun 25 13:29:58 2015 +0530 > @@ -37,7 +37,8 @@ > void PFX(saoCuOrgB0_ ## cpu)(pixel* rec, const int8_t* offsetBo, int > ctuWidth, int ctuHeight, intptr_t stride); \ > void PFX(saoCuStatsE2_ ## cpu)(const pixel *fenc, const pixel *rec, > intptr_t stride, int8_t *upBuff1, int8_t *upBufft, int endX, int endY, > int32_t *stats, int32_t *count); \ > void PFX(saoCuStatsE3_ ## cpu)(const pixel *fenc, const pixel *rec, > intptr_t stride, int8_t *upBuff1, int endX, int endY, int32_t *stats, > int32_t *count); \ > - void PFX(calSign_ ## cpu)(int8_t *dst, const pixel *src1, const pixel > *src2, const int endX); > + void PFX(calSign_ ## cpu)(int8_t *dst, const pixel *src1, const pixel > *src2, const int endX); \ > + void PFX(calculateSign_ ## cpu)(int8_t *dst, const pixel *src1, const > pixel *src2, const int endX); > > Whats the difference between calculateSign_ and calSign_? They have the same function signature and are assigned to the same primitive? > DECL_SAO(sse4); > DECL_SAO(avx2); > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
