Failed to apply, since I pushed in the other sizes first. Please pull and merge?
On Tue, Nov 12, 2013 at 1:47 PM, Min Chen <[email protected]> wrote: > # HG changeset patch > # User Min Chen <[email protected]> > # Date 1384244066 -28800 > # Node ID 9dda3a715f9fe089ee7b1e4db2ffeff28cd477c3 > # Parent 8c731f8c71ff6f42718a80934433a154417caeec > asm: assembly code for x265_pixel_avg_12x16 > > diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Tue Nov 12 16:14:09 2013 > +0800 > +++ b/source/common/x86/asm-primitives.cpp Tue Nov 12 16:14:26 2013 > +0800 > @@ -125,6 +125,7 @@ > p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \ > p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \ > p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \ > + p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \ > p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \ > p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \ > p.pixelavg_pp[LUMA_8x4] = x265_pixel_avg_8x4_ ## cpu; > diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/mc-a.asm > --- a/source/common/x86/mc-a.asm Tue Nov 12 16:14:09 2013 +0800 > +++ b/source/common/x86/mc-a.asm Tue Nov 12 16:14:26 2013 +0800 > @@ -190,7 +190,13 @@ > SWAP 0, 6 > BIWEIGHT [%2+mmsize/2], [%3+mmsize/2] > packuswb m6, m0 > - mova [%1], m6 > +%if %4 != 12 > + mova [%1], m6 > +%else ; !w12 > + movh [%1], m6 > + movhlps m6, m6 > + movd [%1+mmsize/2], m6 > +%endif ; w12 > %endif > %endmacro > > @@ -222,8 +228,12 @@ > %else > %assign x 0 > %rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize > - BIWEIGHT_ROW t0+x, t2+x, t4+x, > %1 > - BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, > t4+x+SIZEOF_PIXEL*t5, %1 > +%assign y mmsize > +%if (%1 == 12) && (%1*SIZEOF_PIXEL-x < mmsize) > +%assign y (%1*SIZEOF_PIXEL-x) > +%endif > + BIWEIGHT_ROW t0+x, t2+x, t4+x, > y > + BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3, > t4+x+SIZEOF_PIXEL*t5, y > %assign x x+mmsize > %endrep > %endif > @@ -235,6 +245,7 @@ > INIT_MMX mmx2 > AVG_WEIGHT 4 > AVG_WEIGHT 8 > +AVG_WEIGHT 12 > AVG_WEIGHT 16 > AVG_WEIGHT 32 > %if HIGH_BIT_DEPTH > @@ -245,6 +256,7 @@ > %else ;!HIGH_BIT_DEPTH > INIT_XMM sse2 > AVG_WEIGHT 8, 7 > +AVG_WEIGHT 12, 7 > AVG_WEIGHT 16, 7 > AVG_WEIGHT 32, 7 > %define BIWEIGHT BIWEIGHT_SSSE3 > @@ -253,6 +265,7 @@ > AVG_WEIGHT 4 > INIT_XMM ssse3 > AVG_WEIGHT 8, 7 > +AVG_WEIGHT 12, 7 > AVG_WEIGHT 16, 7 > AVG_WEIGHT 32, 7 > > @@ -648,7 +661,7 @@ > ; pixel *src2, intptr_t src2_stride, int height, int > weight ); > > > ;----------------------------------------------------------------------------- > > -%macro AVG_FUNC 3 > +%macro AVG_FUNC 3-4 > cglobal pixel_avg_w%1 > AVG_START > .height_loop: > @@ -663,8 +676,13 @@ > pavgb m0, [t4+x] > pavgb m1, [t4+x+SIZEOF_PIXEL*t5] > %endif > +%if (%1 == 12) && (%1-x/SIZEOF_PIXEL < mmsize) > + %4 [t0+x], m0 > + %4 [t0+x+SIZEOF_PIXEL*t1], m1 > +%else > %3 [t0+x], m0 > %3 [t0+x+SIZEOF_PIXEL*t1], m1 > +%endif > %assign x x+mmsize > %endrep > AVG_END > @@ -718,6 +736,9 @@ > AVGH 8, 8 > AVGH 8, 4 > > +AVG_FUNC 12, movq, movq, movd > +AVGH 12, 16 > + > AVG_FUNC 16, movq, movq > AVGH 16, 64 > AVGH 16, 32 > @@ -739,6 +760,8 @@ > AVGH 16, 12 > AVGH 16, 8 > AVGH 16, 4 > +AVG_FUNC 12, movdqu, movdqa, movq > +AVGH 12, 16 > AVGH 8, 16 > AVGH 8, 8 > AVGH 8, 4 > @@ -750,6 +773,7 @@ > AVGH 16, 12 > AVGH 16, 8 > AVGH 16, 4 > +AVGH 12, 16 > AVGH 8, 16 > AVGH 8, 8 > AVGH 8, 4 > diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/pixel.h > --- a/source/common/x86/pixel.h Tue Nov 12 16:14:09 2013 +0800 > +++ b/source/common/x86/pixel.h Tue Nov 12 16:14:26 2013 +0800 > @@ -245,6 +245,7 @@ > DECL_SUF(x265_pixel_avg_16x12, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_16x8, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_16x4, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > +DECL_SUF(x265_pixel_avg_12x16, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_8x16, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_8x8, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > DECL_SUF(x265_pixel_avg_8x4, (pixel *, intptr_t, pixel *, intptr_t, > pixel *, intptr_t, int)) > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
