On Tue, Nov 12, 2013 at 5:27 AM, Min Chen <[email protected]> wrote: > # HG changeset patch > # User Min Chen <[email protected]> > # Date 1384255626 -28800 > # Node ID d0fb42b3fa396e413dc510bd9cf7eb2a8da22f31 > # Parent 0d8ed55eb94d7cf43afb687edbad7f3db349b84c > asm: assembly code for x265_pixel_avg_12x16 >
oh; taking this one instead of the one I fixed up > > diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Tue Nov 12 19:26:48 2013 > +0800 > +++ b/source/common/x86/asm-primitives.cpp Tue Nov 12 19:27:06 2013 > +0800 > @@ -134,6 +134,7 @@ > p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \ > p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \ > p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \ > + p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \ > p.pixelavg_pp[LUMA_8x32] = x265_pixel_avg_8x32_ ## cpu; \ > p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \ > p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \ > diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/mc-a.asm > --- a/source/common/x86/mc-a.asm Tue Nov 12 19:26:48 2013 +0800 > +++ b/source/common/x86/mc-a.asm Tue Nov 12 19:27:06 2013 +0800 > @@ -190,7 +190,13 @@ > SWAP 0, 6 > BIWEIGHT [%2+mmsize/2], [%3+mmsize/2] > packuswb m6, m0 > - mova [%1], m6 > +%if %4 != 12 > + mova [%1], m6 > +%else ; !w12 > + movh [%1], m6 > + movhlps m6, m6 > + movd [%1+mmsize/2], m6 > +%endif ; w12 > %endif > %endmacro > > @@ -222,8 +228,12 @@ > %else > %assign x 0 > %rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize > - BIWEIGHT_ROW t0+x, t2+x, t4+x, > %1 > - BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, > t4+x+SIZEOF_PIXEL*t5, %1 > +%assign y mmsize > +%if (%1 == 12) && (%1*SIZEOF_PIXEL-x < mmsize) > +%assign y (%1*SIZEOF_PIXEL-x) > +%endif > + BIWEIGHT_ROW t0+x, t2+x, t4+x, > y > + BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, > t4+x+SIZEOF_PIXEL*t5, y > %assign x x+mmsize > %endrep > %endif > @@ -235,6 +245,7 @@ > INIT_MMX mmx2 > AVG_WEIGHT 4 > AVG_WEIGHT 8 > +AVG_WEIGHT 12 > AVG_WEIGHT 16 > AVG_WEIGHT 32 > AVG_WEIGHT 64 > @@ -248,6 +259,7 @@ > %else ;!HIGH_BIT_DEPTH > INIT_XMM sse2 > AVG_WEIGHT 8, 7 > +AVG_WEIGHT 12, 7 > AVG_WEIGHT 16, 7 > AVG_WEIGHT 32, 7 > AVG_WEIGHT 64, 7 > @@ -259,6 +271,7 @@ > AVG_WEIGHT 4 > INIT_XMM ssse3 > AVG_WEIGHT 8, 7 > +AVG_WEIGHT 12, 7 > AVG_WEIGHT 16, 7 > AVG_WEIGHT 32, 7 > AVG_WEIGHT 64, 7 > @@ -657,7 +670,7 @@ > ; pixel *src2, intptr_t src2_stride, int height, int > weight ); > > > ;----------------------------------------------------------------------------- > > -%macro AVG_FUNC 3 > +%macro AVG_FUNC 3-4 > cglobal pixel_avg_w%1 > AVG_START > .height_loop: > @@ -672,8 +685,13 @@ > pavgb m0, [t4+x] > pavgb m1, [t4+x+SIZEOF_PIXEL*t5] > %endif > +%if (%1 == 12) && (%1-x/SIZEOF_PIXEL < mmsize) > + %4 [t0+x], m0 > + %4 [t0+x+SIZEOF_PIXEL*t1], m1 > +%else > %3 [t0+x], m0 > %3 [t0+x+SIZEOF_PIXEL*t1], m1 > +%endif > %assign x x+mmsize > %endrep > AVG_END > @@ -728,6 +746,9 @@ > AVGH 8, 8 > AVGH 8, 4 > > +AVG_FUNC 12, movq, movq, movd > +AVGH 12, 16 > + > AVG_FUNC 16, movq, movq > AVGH 16, 64 > AVGH 16, 32 > @@ -780,6 +801,9 @@ > AVG_FUNC 48, movdqu, movdqa > AVGH 48, 64 > > +AVG_FUNC 12, movdqu, movdqa, movq > +AVGH 12, 16 > + > AVGH 8, 32 > AVGH 8, 16 > AVGH 8, 8 > @@ -806,6 +830,8 @@ > > AVGH 48, 64 > > +AVGH 12, 16 > + > AVGH 8, 32 > AVGH 8, 16 > AVGH 8, 8 > diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/pixel.h > --- a/source/common/x86/pixel.h Tue Nov 12 19:26:48 2013 +0800 > +++ b/source/common/x86/pixel.h Tue Nov 12 19:27:06 2013 +0800 > @@ -254,6 +254,7 @@ > DECL_SUF(x265_pixel_avg_16x12, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_16x8, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_16x4, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > +DECL_SUF(x265_pixel_avg_12x16, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_8x32, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_8x16, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_8x8, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- Steve Borho
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
