On Tue, Nov 12, 2013 at 7:41 AM, <[email protected]> wrote:
> # HG changeset patch > # User Murugan Vairavel <[email protected]> > # Date 1384263623 -19800 > # Tue Nov 12 19:10:23 2013 +0530 > # Node ID b1e0fe97bbfa7bf367d7318f057690c64f1f1f19 > # Parent 7a8118d07276312b2971b292d689805074abd28a > asm: Unit test code for pixelsub_ps function > you need to address Min's comments for the asm patch > > diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/pixel.cpp > --- a/source/common/pixel.cpp Tue Nov 12 17:06:34 2013 +0530 > +++ b/source/common/pixel.cpp Tue Nov 12 19:10:23 2013 +0530 > @@ -778,6 +778,22 @@ > b += strideb; > } > } > + > +template<int bx, int by> > +void pixel_sub_ps_c(int16_t *a, intptr_t dstride, pixel *b0, pixel *b1, > intptr_t sstride0, intptr_t sstride1) > +{ > + for (int y = 0; y < by; y++) > + { > + for (int x = 0; x < bx; x++) > + { > + a[x] = (int16_t)(b0[x] - b1[x]); > + } > + > + b0 += sstride0; > + b1 += sstride1; > + a += dstride; > + } > +} > } // end anonymous namespace > > namespace x265 { > @@ -821,12 +837,14 @@ > #define CHROMA(W, H) \ > p.chroma_copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \ > p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \ > - p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; > + p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\ > + p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; > > #define LUMA(W, H) \ > p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \ > p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \ > - p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; > + p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\ > + p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; > > LUMA(4, 4); > LUMA(8, 8); > diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/primitives.h > --- a/source/common/primitives.h Tue Nov 12 17:06:34 2013 +0530 > +++ b/source/common/primitives.h Tue Nov 12 19:10:23 2013 +0530 > @@ -207,6 +207,8 @@ > typedef void (*copy_sp_t)(pixel *dst, intptr_t dstStride, int16_t *src, > intptr_t srcStride); > typedef void (*copy_ps_t)(int16_t *dst, intptr_t dstStride, pixel *src, > intptr_t srcStride); > > +typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel > *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1); > + > /* Define a structure containing function pointers to optimized encoder > * primitives. Each pointer can reference either an assembly routine, > * a vectorized primitive, or a C function. */ > @@ -237,6 +239,9 @@ > copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS]; > copy_ps_t chroma_copy_ps[NUM_CHROMA_PARTITIONS]; > > + pixel_sub_ps_t luma_sub_ps[NUM_LUMA_PARTITIONS]; > + pixel_sub_ps_t chroma_sub_ps[NUM_CHROMA_PARTITIONS]; > + > ipfilter_ps_t ipfilter_ps[NUM_IPFILTER_P_S]; > ipfilter_sp_t ipfilter_sp[NUM_IPFILTER_S_P]; > ipfilter_ss_t ipfilter_ss[NUM_IPFILTER_S_S]; > diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Tue Nov 12 17:06:34 2013 > +0530 > +++ b/source/common/x86/asm-primitives.cpp Tue Nov 12 19:10:23 2013 > +0530 > @@ -133,7 +133,8 @@ > > #define SETUP_CHROMA_FUNC_DEF(W, H, cpu) \ > p.chroma_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## > W ## x ## H ## cpu; \ > - p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W > ## x ## H ## cpu; > + p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W > ## x ## H ## cpu;\ > + p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## > x ## H ## cpu; > > #define SETUP_CHROMA_BLOCKCOPY_FUNC_DEF(W, H, cpu) \ > p.chroma_copy_pp[CHROMA_ ## W ## x ## H] = x265_blockcopy_pp_ ## W ## > x ## H ## cpu; > @@ -194,7 +195,8 @@ > p.luma_hpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_pp_ ## W ## > x ## H ## cpu; \ > p.luma_hps[LUMA_ ## W ## x ## H] = x265_interp_8tap_horiz_ps_ ## W ## > x ## H ## cpu; \ > p.luma_vpp[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_pp_ ## W ## > x ## H ## cpu; \ > - p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## > x ## H ## cpu; > + p.luma_vps[LUMA_ ## W ## x ## H] = x265_interp_8tap_vert_ps_ ## W ## > x ## H ## cpu;\ > + p.luma_sub_ps[LUMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## > H ## cpu; > > #define SETUP_LUMA_BLOCKCOPY_FUNC_DEF(W, H, cpu) \ > p.luma_copy_pp[LUMA_ ## W ## x ## H] = x265_blockcopy_pp_ ## W ## x > ## H ## cpu; > diff -r 7a8118d07276 -r b1e0fe97bbfa source/common/x86/pixel.h > --- a/source/common/x86/pixel.h Tue Nov 12 17:06:34 2013 +0530 > +++ b/source/common/x86/pixel.h Tue Nov 12 19:10:23 2013 +0530 > @@ -266,11 +266,77 @@ > DECL_ADS(2, avx2) > DECL_ADS(1, avx2) > > +#define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \ > + void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t > destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t > srcstride1); > + > +#define CHROMA_PIXELSUB_DEF(cpu) \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 4, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 2, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(2, 4, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 8, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 4, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 8, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 6, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(6, 8, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 2, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(2, 8, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 16, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 8, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 16, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 12, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(12, 16, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 4, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(4, 16, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 32, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 16, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(16, 32, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 24, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(24, 32, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(32, 8, cpu); \ > + SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 32, cpu); > + > +#define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \ > + void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t > destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t > srcstride1); > + > +#define LUMA_PIXELSUB_DEF(cpu) \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(4, 4, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(8, 8, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(8, 4, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(4, 8, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(16, 16, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(16, 8, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(8, 16, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(16, 12, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(12, 16, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(16, 4, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(4, 16, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(32, 32, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(32, 16, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(16, 32, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(32, 24, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(24, 32, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(32, 8, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(8, 32, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(64, 64, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(64, 32, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(32, 64, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(64, 48, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(48, 64, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(64, 16, cpu); \ > + SETUP_LUMA_PIXELSUB_PS_FUNC(16, 64, cpu); > + > +CHROMA_PIXELSUB_DEF(_sse4); > +LUMA_PIXELSUB_DEF(_sse4); > + > #undef DECL_PIXELS > #undef DECL_SUF > #undef DECL_HEVC_SSD > #undef DECL_X1 > #undef DECL_X4 > #undef DECL_ADS > +#undef SETUP_CHROMA_PIXELSUB_PS_FUNC > +#undef SETUP_LUMA_PIXELSUB_PS_FUNC > +#undef CHROMA_PIXELSUB_DEF > +#undef LUMA_PIXELSUB_DEF > > #endif // ifndef X265_I386_PIXEL_H > diff -r 7a8118d07276 -r b1e0fe97bbfa source/test/pixelharness.cpp > --- a/source/test/pixelharness.cpp Tue Nov 12 17:06:34 2013 +0530 > +++ b/source/test/pixelharness.cpp Tue Nov 12 19:10:23 2013 +0530 > @@ -586,6 +586,29 @@ > return true; > } > > +bool PixelHarness::check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t > opt) > +{ > + ALIGN_VAR_16(int16_t, ref_dest[64 * 64]); > + ALIGN_VAR_16(int16_t, opt_dest[64 * 64]); > + > + memset(ref_dest, 0xCD, sizeof(ref_dest)); > + memset(opt_dest, 0xCD, sizeof(opt_dest)); > + > + int j = 0; > + for (int i = 0; i < 1; i++) > + { > + opt(opt_dest, 64, pbuf2 + j, pbuf1 + j, STRIDE, STRIDE); > + ref(ref_dest, 64, pbuf2 + j, pbuf1 + j, STRIDE, STRIDE); > + > + if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t))) > + return false; > + > + j += INCR; > + } > + > + return true; > +} > + > bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref, > const EncoderPrimitives& opt) > { > if (opt.satd[part]) > @@ -722,6 +745,24 @@ > return false; > } > } > + > + if (opt.luma_sub_ps[part]) > + { > + if (!check_pixel_sub_ps(ref.luma_sub_ps[part], > opt.luma_sub_ps[part])) > + { > + printf("luma_sub_ps[%s] failed\n", lumaPartStr[part]); > + return false; > + } > + } > + > + if (opt.chroma_sub_ps[part]) > + { > + if (!check_pixel_sub_ps(ref.chroma_sub_ps[part], > opt.chroma_sub_ps[part])) > + { > + printf("chroma_sub_ps[%s] failed\n", chromaPartStr[part]); > + return false; > + } > + } > return true; > } > > @@ -968,6 +1009,18 @@ > printf("ccpy_ps[%s]", chromaPartStr[part]); > REPORT_SPEEDUP(opt.chroma_copy_ps[part], > ref.chroma_copy_ps[part], sbuf1, 64, pbuf1, 128); > } > + > + if (opt.luma_sub_ps[part]) > + { > + printf("luma_sub_ps[%s]", lumaPartStr[part]); > + REPORT_SPEEDUP(opt.luma_sub_ps[part], ref.luma_sub_ps[part], > (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE); > + } > + > + if (opt.chroma_sub_ps[part]) > + { > + printf("chroma_sub_ps[%s]", chromaPartStr[part]); > + REPORT_SPEEDUP(opt.chroma_sub_ps[part], ref.chroma_sub_ps[part], > (int16_t*)pbuf1, FENC_STRIDE, pbuf2, pbuf1, STRIDE, STRIDE); > + } > } > > void PixelHarness::measureSpeed(const EncoderPrimitives& ref, const > EncoderPrimitives& opt) > diff -r 7a8118d07276 -r b1e0fe97bbfa source/test/pixelharness.h > --- a/source/test/pixelharness.h Tue Nov 12 17:06:34 2013 +0530 > +++ b/source/test/pixelharness.h Tue Nov 12 19:10:23 2013 +0530 > @@ -60,6 +60,8 @@ > bool check_block_copy_ps(copy_ps_t ref, copy_ps_t opt); > > bool check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt); > + > + bool check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt); > public: > > PixelHarness(); > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- Steve Borho
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
