# HG changeset patch # User Akil Ayyappan<a...@multicorewareinc.com> # Date 1551251102 -19800 # Wed Feb 27 12:35:02 2019 +0530 # Node ID d12a4caf7963fd47d646040689ad5f02754ad879 # Parent cb3e172a5f51c6a4bf8adb7953fe53277f5a1979 x86: ssimDistortion primitive
This patch adds AVX2 assembly for this primitive. Pushed patch to default branch of x265 repo. Thanks & Regards, Dinesh On Tue, Mar 5, 2019 at 10:03 AM Akil <a...@multicorewareinc.com> wrote: > > # HG changeset patch > # User Akil Ayyappan<a...@multicorewareinc.com> > # Date 1551251102 -19800 > # Wed Feb 27 12:35:02 2019 +0530 > # Node ID d12a4caf7963fd47d646040689ad5f02754ad879 > # Parent cb3e172a5f51c6a4bf8adb7953fe53277f5a1979 > x86: ssimDistortion primitive > > This patch adds AVX2 assembly for this primitive. > > |---------|-----------|-----------------|-----------------| > | Size |Performance|AVX2 clock cycles|CPP clock cycles | > |---------|-----------|-----------------|-----------------| > | [4x4] | 3.52x | 264.43 | 932.05 | > | [8x8] | 5.11x | 619.24 | 3163.56 | > | [16x16] | 5.44x | 2114.00 | 11490.52 | > | [32x32] | 6.01x | 7589.70 | 45608.01 | > | [64x64] | 6.70x | 27859.21 | 186634.25 | > |---------|-----------|-----------------|-----------------| > > diff -r cb3e172a5f51 -r d12a4caf7963 source/common/pixel.cpp > --- a/source/common/pixel.cpp Tue Feb 19 20:20:35 2019 +0530 > +++ b/source/common/pixel.cpp Wed Feb 27 12:35:02 2019 +0530 > @@ -934,6 +934,31 @@ > } > } > > +template<int log2TrSize> > +static void ssimDist_c(const pixel* fenc, uint32_t fStride, const pixel* > recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k) > +{ > + *ssBlock = 0; > + const uint32_t trSize = 1 << log2TrSize; > + for (int y = 0; y < trSize; y++) > + { > + for (int x = 0; x < trSize; x++) > + { > + int temp = fenc[y * fStride + x] - recon[y * rstride + x]; // > copy of residual coeff > + *ssBlock += temp * temp; > + } > + } > + > + *ac_k = 0; > + for (int block_yy = 0; block_yy < trSize; block_yy += 1) > + { > + for (int block_xx = 0; block_xx < trSize; block_xx += 1) > + { > + uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift; > + *ac_k += temp * temp; > + } > + } > +} > + > #if HIGH_BIT_DEPTH > static pixel planeClipAndMax_c(pixel *src, intptr_t stride, int width, > int height, uint64_t *outsum, > const pixel minPix, const pixel maxPix) > @@ -1283,5 +1308,11 @@ > p.propagateCost = estimateCUPropagateCost; > p.fix8Unpack = cuTreeFix8Unpack; > p.fix8Pack = cuTreeFix8Pack; > + > + p.cu[BLOCK_4x4].ssimDist = ssimDist_c<2>; > + p.cu[BLOCK_8x8].ssimDist = ssimDist_c<3>; > + p.cu[BLOCK_16x16].ssimDist = ssimDist_c<4>; > + p.cu[BLOCK_32x32].ssimDist = ssimDist_c<5>; > + p.cu[BLOCK_64x64].ssimDist = ssimDist_c<6>; > } > } > diff -r cb3e172a5f51 -r d12a4caf7963 source/common/primitives.h > --- a/source/common/primitives.h Tue Feb 19 20:20:35 2019 +0530 > +++ b/source/common/primitives.h Wed Feb 27 12:35:02 2019 +0530 > @@ -227,6 +227,7 @@ > typedef void(*psyRdoQuant_t)(int16_t *m_resiDctCoeff, int16_t > *m_fencDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t > *totalRdCost, int64_t *psyScale, uint32_t blkPos); > typedef void(*psyRdoQuant_t1)(int16_t *m_resiDctCoeff, int64_t > *costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost,uint32_t > blkPos); > typedef void(*psyRdoQuant_t2)(int16_t *m_resiDctCoeff, int16_t > *m_fencDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t > *totalRdCost, int64_t *psyScale, uint32_t blkPos); > +typedef void(*ssimDistortion_t)(const pixel *fenc, uint32_t fStride, > const pixel *recon, intptr_t rstride, uint64_t *ssBlock, int shift, > uint64_t *ac_k); > /* Function pointers to optimized encoder primitives. Each pointer can > reference > * either an assembly routine, a SIMD intrinsic primitive, or a C > function */ > struct EncoderPrimitives > @@ -303,6 +304,7 @@ > psyRdoQuant_t psyRdoQuant; > psyRdoQuant_t1 psyRdoQuant_1p; > psyRdoQuant_t2 psyRdoQuant_2p; > + ssimDistortion_t ssimDist; > } > cu[NUM_CU_SIZES]; > /* These remaining primitives work on either fixed block sizes or take > diff -r cb3e172a5f51 -r d12a4caf7963 source/common/quant.cpp > --- a/source/common/quant.cpp Tue Feb 19 20:20:35 2019 +0530 > +++ b/source/common/quant.cpp Wed Feb 27 12:35:02 2019 +0530 > @@ -501,15 +501,8 @@ > > // Calculation of (X(k) - Y(k)) * (X(k) - Y(k)), AC > ssBlock = 0; > - for (int y = 0; y < trSize; y++) > - { > - for (int x = 0; x < trSize; x++) > - { > - int temp = fenc[y * fStride + x] - recon[y * rstride + x]; // > copy of residual coeff > - ssBlock += temp * temp; > - } > - } > - > + uint64_t ac_k = 0; > + primitives.cu[log2TrSize - 2].ssimDist(fenc, fStride, recon, > rstride, &ssBlock, shift, &ac_k); > ssAc = ssBlock - ssDc; > > // 1. Calculation of fdc' > @@ -535,15 +528,6 @@ > uint64_t fAc_num = 0; > > // 2. Calculate ac component > - uint64_t ac_k = 0; > - for (int block_yy = 0; block_yy < trSize; block_yy += 1) > - { > - for (int block_xx = 0; block_xx < trSize; block_xx += 1) > - { > - uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift; > - ac_k += temp * temp; > - } > - } > ac_k -= dc_k; > > double s = 1 + 0.005 * cu.m_qp[absPartIdx]; > diff -r cb3e172a5f51 -r d12a4caf7963 source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Tue Feb 19 20:20:35 2019 +0530 > +++ b/source/common/x86/asm-primitives.cpp Wed Feb 27 12:35:02 2019 +0530 > @@ -2319,6 +2319,12 @@ > p.cu[BLOCK_16x16].psyRdoQuant_1p = PFX(psyRdoQuant_1p16_avx2); > p.cu[BLOCK_32x32].psyRdoQuant_1p = PFX(psyRdoQuant_1p32_avx2); > > + p.cu[BLOCK_4x4].ssimDist = PFX(ssimDist4_avx2); > + p.cu[BLOCK_8x8].ssimDist = PFX(ssimDist8_avx2); > + p.cu[BLOCK_16x16].ssimDist = PFX(ssimDist16_avx2); > + p.cu[BLOCK_32x32].ssimDist = PFX(ssimDist32_avx2); > + p.cu[BLOCK_64x64].ssimDist = PFX(ssimDist64_avx2); > + > /* TODO: This kernel needs to be modified to work with > HIGH_BIT_DEPTH only > p.planeClipAndMax = PFX(planeClipAndMax_avx2); */ > > @@ -4706,6 +4712,12 @@ > p.cu[BLOCK_16x16].psyRdoQuant_1p = PFX(psyRdoQuant_1p16_avx2); > p.cu[BLOCK_32x32].psyRdoQuant_1p = PFX(psyRdoQuant_1p32_avx2); > > + p.cu[BLOCK_4x4].ssimDist = PFX(ssimDist4_avx2); > + p.cu[BLOCK_8x8].ssimDist = PFX(ssimDist8_avx2); > + p.cu[BLOCK_16x16].ssimDist = PFX(ssimDist16_avx2); > + p.cu[BLOCK_32x32].ssimDist = PFX(ssimDist32_avx2); > + p.cu[BLOCK_64x64].ssimDist = PFX(ssimDist64_avx2); > + > } > if (cpuMask & X265_CPU_AVX512) > { > diff -r cb3e172a5f51 -r d12a4caf7963 source/common/x86/pixel-a.asm > --- a/source/common/x86/pixel-a.asm Tue Feb 19 20:20:35 2019 +0530 > +++ b/source/common/x86/pixel-a.asm Wed Feb 27 12:35:02 2019 +0530 > @@ -73,6 +73,16 @@ > cextern pb_movemask_32 > cextern pw_pixel_max > > +%if BIT_DEPTH == 12 > + %define SSIMRD_SHIFT 4 > +%elif BIT_DEPTH == 10 > + %define SSIMRD_SHIFT 2 > +%elif BIT_DEPTH == 8 > + %define SSIMRD_SHIFT 0 > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > > > ;============================================================================= > ; SATD > > > ;============================================================================= > @@ -360,6 +370,24 @@ > RET > %endmacro > > +%macro SSIM_RD_COL 2 > + vpsrld m6, m0, SSIMRD_SHIFT > + vpsubd m0, m1 > + > + vpmuldq m2, m0, m0 > + vpsrldq m0, m0, 4 > + vpmuldq m0, m0, m0 > + vpaddq m0, m2 > + > + vpmuldq m2, m6, m6 > + vpsrldq m6, m6, 4 > + vpmuldq m6, m6, m6 > + vpaddq m6, m2 > + > + vpaddq m4, m0 > + vpaddq m7, m6 > +%endmacro > + > ; FIXME avoid the spilling of regs to hold 3*stride. > ; for small blocks on x86_32, modify pixel pointer instead. > > @@ -15883,3 +15911,395 @@ > RET > %endif > %endif ; HIGH_BIT_DEPTH == 1 && BIT_DEPTH == 10 > + > +;template<int log2TrSize> > +;static void ssimDist_c(const pixel* fenc, uint32_t fStride, const pixel* > recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k) > +;{ > +; *ssBlock = 0; > +; const uint32_t trSize = 1 << log2TrSize; > +; for (int y = 0; y < trSize; y++) > +; { > +; for (int x = 0; x < trSize; x++) > +; { > +; int temp = fenc[y * fStride + x] - recon[y * rstride + x]; > // copy of residual coeff > +; *ssBlock += temp * temp; > +; } > +; } > +; > +; *ac_k = 0; > +; for (int block_yy = 0; block_yy < trSize; block_yy += 1) > +; { > +; for (int block_xx = 0; block_xx < trSize; block_xx += 1) > +; { > +; uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift; > +; *ac_k += temp * temp; > +; } > +; } > +;} > > +;----------------------------------------------------------------------------------------------------------------- > +; void ssimDist_c(const pixel* fenc, uint32_t fStride, const pixel* > recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k) > > +;----------------------------------------------------------------------------------------------------------------- > + > +INIT_YMM avx2 > +cglobal ssimDist4, 7, 8, 8 > + mov r7d, 4 > + vpxor m4, m4 ;ssBlock > + vpxor m3, m3 > + vpxor m7, m7 ;ac_k > +.row: > +%if HIGH_BIT_DEPTH > + vpmovzxwq m0, [r0] ;fenc > + vpmovzxwq m1, [r2] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbq m0, [r0] > + vpmovzxbq m1, [r2] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + vpsrlq m6, m0, SSIMRD_SHIFT > + vpsubq m0, m1 > + vpmuldq m0, m0, m0 > + vpmuldq m6, m6, m6 > + vpaddq m4, m0 > + vpaddq m7, m6 > + > +%if HIGH_BIT_DEPTH > + lea r0, [r0 + 2 * r1] > + lea r2, [r2 + 2 * r3] > +%else > + lea r0, [r0 + r1] > + lea r2, [r2 + r3] > +%endif > + dec r7d > + jnz .row > + vextracti128 xm5, m4, 1 > + vpaddq xm4, xm5 > + punpckhqdq xm2, xm4, xm3 > + paddq xm4, xm2 > + > + vextracti128 xm5, m7, 1 > + vpaddq xm7, xm5 > + punpckhqdq xm2, xm7, xm3 > + paddq xm7, xm2 > + > + movq [r4], xm4 > + movq [r6], xm7 > + RET > + > + > +INIT_YMM avx2 > +cglobal ssimDist8, 7, 8, 8 > + mov r7d, 8 > + vpxor m4, m4 ;ssBlock > + vpxor m3, m3 > + vpxor m7, m7 ;ac_k > +.row: > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0] ;fenc > + vpmovzxwd m1, [r2] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0] > + vpmovzxbd m1, [r2] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +%if HIGH_BIT_DEPTH > + lea r0, [r0 + 2 * r1] > + lea r2, [r2 + 2 * r3] > +%else > + lea r0, [r0 + r1] > + lea r2, [r2 + r3] > +%endif > + dec r7d > + jnz .row > + vextracti128 xm5, m4, 1 > + vpaddq xm4, xm5 > + punpckhqdq xm2, xm4, xm3 > + paddq xm4, xm2 > + > + vextracti128 xm5, m7, 1 > + vpaddq xm7, xm5 > + punpckhqdq xm2, xm7, xm3 > + paddq xm7, xm2 > + > + movq [r4], xm4 > + movq [r6], xm7 > + RET > + > + > +INIT_YMM avx2 > +cglobal ssimDist16, 7, 8, 8 > + mov r7d, 16 > + vpxor m4, m4 ;ssBlock > + vpxor m3, m3 > + vpxor m7, m7 ;ac_k > +.row: > +;Col 1-8 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0] ;fenc > + vpmovzxwd m1, [r2] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0] > + vpmovzxbd m1, [r2] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 9-16 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 16] ;fenc > + vpmovzxwd m1, [r2 + 16] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 8] > + vpmovzxbd m1, [r2 + 8] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +%if HIGH_BIT_DEPTH > + lea r0, [r0 + 2 * r1] > + lea r2, [r2 + 2 * r3] > +%else > + lea r0, [r0 + r1] > + lea r2, [r2 + r3] > +%endif > + dec r7d > + jnz .row > + vextracti128 xm5, m4, 1 > + vpaddq xm4, xm5 > + punpckhqdq xm2, xm4, xm3 > + paddq xm4, xm2 > + > + vextracti128 xm5, m7, 1 > + vpaddq xm7, xm5 > + punpckhqdq xm2, xm7, xm3 > + paddq xm7, xm2 > + > + movq [r4], xm4 > + movq [r6], xm7 > + RET > + > + > +INIT_YMM avx2 > +cglobal ssimDist32, 7, 8, 8 > + mov r7d, 32 > + vpxor m4, m4 ;ssBlock > + vpxor m3, m3 > + vpxor m7, m7 ;ac_k > +.row: > +;Col 1-8 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0] ;fenc > + vpmovzxwd m1, [r2] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0] > + vpmovzxbd m1, [r2] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 9-16 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 16] ;fenc > + vpmovzxwd m1, [r2 + 16] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 8] > + vpmovzxbd m1, [r2 + 8] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 17-24 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 32] ;fenc > + vpmovzxwd m1, [r2 + 32] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 16] > + vpmovzxbd m1, [r2 + 16] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 25-32 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 48] ;fenc > + vpmovzxwd m1, [r2 + 48] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 24] > + vpmovzxbd m1, [r2 + 24] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +%if HIGH_BIT_DEPTH > + lea r0, [r0 + 2 * r1] > + lea r2, [r2 + 2 * r3] > +%else > + lea r0, [r0 + r1] > + lea r2, [r2 + r3] > +%endif > + dec r7d > + jnz .row > + vextracti128 xm5, m4, 1 > + vpaddq xm4, xm5 > + punpckhqdq xm2, xm4, xm3 > + paddq xm4, xm2 > + > + vextracti128 xm5, m7, 1 > + vpaddq xm7, xm5 > + punpckhqdq xm2, xm7, xm3 > + paddq xm7, xm2 > + > + movq [r4], xm4 > + movq [r6], xm7 > + RET > + > + > +INIT_YMM avx2 > +cglobal ssimDist64, 7, 8, 8 > + mov r7d, 64 > + vpxor m4, m4 ;ssBlock > + vpxor m3, m3 > + vpxor m7, m7 ;ac_k > +.row: > +;Col 1-8 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0] ;fenc > + vpmovzxwd m1, [r2] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0] > + vpmovzxbd m1, [r2] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 9-16 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 16] ;fenc > + vpmovzxwd m1, [r2 + 16] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 8] > + vpmovzxbd m1, [r2 + 8] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 17-24 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 32] ;fenc > + vpmovzxwd m1, [r2 + 32] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 16] > + vpmovzxbd m1, [r2 + 16] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 25-32 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 48] ;fenc > + vpmovzxwd m1, [r2 + 48] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 24] > + vpmovzxbd m1, [r2 + 24] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 33-40 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 64] ;fenc > + vpmovzxwd m1, [r2 + 64] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 32] > + vpmovzxbd m1, [r2 + 32] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 41-48 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 80] ;fenc > + vpmovzxwd m1, [r2 + 80] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 40] > + vpmovzxbd m1, [r2 + 40] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 49-56 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 96] ;fenc > + vpmovzxwd m1, [r2 + 96] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 48] > + vpmovzxbd m1, [r2 + 48] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +;Col 57-64 > +%if HIGH_BIT_DEPTH > + vpmovzxwd m0, [r0 + 112] ;fenc > + vpmovzxwd m1, [r2 + 112] ;recon > +%elif BIT_DEPTH == 8 > + vpmovzxbd m0, [r0 + 56] > + vpmovzxbd m1, [r2 + 56] > +%else > + %error Unsupported BIT_DEPTH! > +%endif > + > + SSIM_RD_COL m0, m1 > + > +%if HIGH_BIT_DEPTH > + lea r0, [r0 + 2 * r1] > + lea r2, [r2 + 2 * r3] > +%else > + lea r0, [r0 + r1] > + lea r2, [r2 + r3] > +%endif > + dec r7d > + jnz .row > + vextracti128 xm5, m4, 1 > + vpaddq xm4, xm5 > + punpckhqdq xm2, xm4, xm3 > + paddq xm4, xm2 > + > + vextracti128 xm5, m7, 1 > + vpaddq xm7, xm5 > + punpckhqdq xm2, xm7, xm3 > + paddq xm7, xm2 > + > + movq [r4], xm4 > + movq [r6], xm7 > + RET > diff -r cb3e172a5f51 -r d12a4caf7963 source/common/x86/pixel.h > --- a/source/common/x86/pixel.h Tue Feb 19 20:20:35 2019 +0530 > +++ b/source/common/x86/pixel.h Wed Feb 27 12:35:02 2019 +0530 > @@ -60,7 +60,8 @@ > FUNCDEF_TU_S(sse_t, pixel_ssd_s_aligned, cpu, const int16_t*, > intptr_t); \ > FUNCDEF_TU(uint64_t, pixel_var, cpu, const pixel*, intptr_t); \ > FUNCDEF_TU(int, psyCost_pp, cpu, const pixel* source, intptr_t > sstride, const pixel* recon, intptr_t rstride); \ > - FUNCDEF_TU(int, psyCost_ss, cpu, const int16_t* source, intptr_t > sstride, const int16_t* recon, intptr_t rstride) > + FUNCDEF_TU(int, psyCost_ss, cpu, const int16_t* source, intptr_t > sstride, const int16_t* recon, intptr_t rstride); \ > + FUNCDEF_TU_S2(void, ssimDist, cpu, const pixel *fenc, uint32_t > fStride, const pixel *recon, intptr_t rstride, uint64_t *ssBlock, int > shift, uint64_t *ac_k) > > DECL_PIXELS(mmx); > DECL_PIXELS(mmx2); > diff -r cb3e172a5f51 -r d12a4caf7963 source/test/pixelharness.cpp > --- a/source/test/pixelharness.cpp Tue Feb 19 20:20:35 2019 +0530 > +++ b/source/test/pixelharness.cpp Wed Feb 27 12:35:02 2019 +0530 > @@ -2270,6 +2270,32 @@ > return true; > } > > +bool PixelHarness::check_ssimDist(ssimDistortion_t ref, ssimDistortion_t > opt) > +{ > + uint32_t srcStride[5] = { 4, 8, 16, 32, 64 }; > + intptr_t dstStride[5] = { 4, 8, 16, 32, 64 }; > + int shift = X265_DEPTH - 8; > + uint64_t opt_dest1 = 0, ref_dest1 = 0, opt_dest2 = 0, ref_dest2 = 0; > + int j = 0; > + > + for (int i = 0; i < ITERS; i++) > + { > + int index = i % TEST_CASES; > + int k1 = rand() % 5, k2 = rand() % 5; > + ref(pixel_test_buff[index] + j, srcStride[k1], > pixel_test_buff[index + 10] + j, dstStride[k2], &ref_dest1, shift, > &ref_dest2); > + opt(pixel_test_buff[index] + j, srcStride[k1], > pixel_test_buff[index + 10] + j, dstStride[k2], &opt_dest1, shift, > &opt_dest2); > + > + if (opt_dest1 != ref_dest1 && opt_dest2 != ref_dest2) > + { > + return false; > + } > + > + reportfail() > + j += INCR; > + } > + return true; > +} > + > bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const > EncoderPrimitives& opt) > { > if (opt.pu[part].satd) > @@ -2607,6 +2633,15 @@ > } > } > > + if (opt.cu[i].ssimDist) > + { > + if (!check_ssimDist(ref.cu[i].ssimDist, opt.cu[i].ssimDist)) > + { > + printf("\nssimDist[%dx%d] failed!\n", 4 << i, 4 << i); > + return false; > + } > + } > + > if (i < BLOCK_64x64) > { > /* TU only primitives */ > @@ -3093,6 +3128,7 @@ > return false; > } > } > + > return true; > } > > @@ -3392,6 +3428,14 @@ > HEADER("psy_cost_pp[%dx%d]", 4 << i, 4 << i); > REPORT_SPEEDUP(opt.cu[i].psy_cost_pp, ref.cu[i].psy_cost_pp, > pbuf1, STRIDE, pbuf2, STRIDE); > } > + > + if (opt.cu[i].ssimDist) > + { > + uint64_t dst1 = 0, dst2 = 0; > + int shift = X265_DEPTH - 8; > + printf("ssimDist[%dx%d]", 4 << i, 4 << i); > + REPORT_SPEEDUP(opt.cu[i].ssimDist, ref.cu[i].ssimDist, > pixel_test_buff[0], 32, pixel_test_buff[5], 64, &dst1, shift, &dst2); > + } > } > > if (opt.weight_pp) > diff -r cb3e172a5f51 -r d12a4caf7963 source/test/pixelharness.h > --- a/source/test/pixelharness.h Tue Feb 19 20:20:35 2019 +0530 > +++ b/source/test/pixelharness.h Wed Feb 27 12:35:02 2019 +0530 > @@ -136,6 +136,7 @@ > bool check_pelFilterChroma_H(pelFilterChroma_t ref, pelFilterChroma_t > opt); > bool check_integral_initv(integralv_t ref, integralv_t opt); > bool check_integral_inith(integralh_t ref, integralh_t opt); > + bool check_ssimDist(ssimDistortion_t ref, ssimDistortion_t opt); > > public: > > > -- > *Regards,* > *Akil R* > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel