--- libpostproc/postprocess_template.c | 296 +++++++++++++++++++------------------ 1 file changed, 152 insertions(+), 144 deletions(-)
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c index 8220d36..866ba8f 100644 --- a/libpostproc/postprocess_template.c +++ b/libpostproc/postprocess_template.c @@ -3245,8 +3245,10 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride) /** * Filter array of bytes (Y or U or V values) */ -static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, - const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2) +static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], + int dstStride, int width, int height, + const QP_STORE_T QPs[], int QPStride, + int isColor, PPContext *c2) { DECLARE_ALIGNED(8, PPContext, c)= *c2; //copy to stack for faster access int x,y; @@ -3374,7 +3376,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32); RENAME(blockCopy)(dstBlock + dstStride*8, dstStride, - srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset); + srcBlock + srcStride*8, srcStride, + mode & LEVEL_FIX, &c.packedYOffset); RENAME(duplicate)(dstBlock + dstStride*8, dstStride); @@ -3389,7 +3392,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ else if(mode & FFMPEG_DEINT_FILTER) RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); else if(mode & LOWPASS5_DEINT_FILTER) - RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); + RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, + c.deintTemp + width + x); /* else if(mode & CUBIC_BLEND_DEINT_FILTER) RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); */ @@ -3453,167 +3457,170 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ for(qp_index=0; qp_index < (endx-startx)/BLOCK_SIZE; qp_index++){ QP = QPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift]; nonBQP = nonBQPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift]; - if(!isColor){ - QP= (QP* QPCorrecture + 256*128)>>16; - nonBQP= (nonBQP* QPCorrecture + 256*128)>>16; - yHistogram[srcBlock[srcStride*12 + 4]]++; - } - c.QP_block[qp_index] = QP; - c.nonBQP_block[qp_index] = nonBQP; + if(!isColor){ + QP= (QP* QPCorrecture + 256*128)>>16; + nonBQP= (nonBQP* QPCorrecture + 256*128)>>16; + yHistogram[srcBlock[srcStride*12 + 4]]++; + } + c.QP_block[qp_index] = QP; + c.nonBQP_block[qp_index] = nonBQP; + #if TEMPLATE_PP_MMX - __asm__ volatile( - "movd %1, %%mm7 \n\t" - "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP - "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP - "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP - "movq %%mm7, %0 \n\t" - : "=m" (c.pQPb_block[qp_index]) - : "r" (QP) - ); + __asm__ volatile( + "movd %1, %%mm7 \n\t" + "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP + "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP + "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP + "movq %%mm7, %0 \n\t" + : "=m" (c.pQPb_block[qp_index]) + : "r" (QP) + ); #endif } - for(; x < endx; x+=BLOCK_SIZE){ - prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32); - prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32); - prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32); - prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32); - - RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride, - srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset); - if(mode & LINEAR_IPOL_DEINT_FILTER) - RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); - else if(mode & LINEAR_BLEND_DEINT_FILTER) - RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x); - else if(mode & MEDIAN_DEINT_FILTER) - RENAME(deInterlaceMedian)(dstBlock, dstStride); - else if(mode & CUBIC_IPOL_DEINT_FILTER) - RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); - else if(mode & FFMPEG_DEINT_FILTER) - RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); - else if(mode & LOWPASS5_DEINT_FILTER) - RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); -/* else if(mode & CUBIC_BLEND_DEINT_FILTER) - RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); -*/ - dstBlock+=8; - srcBlock+=8; - } + for(; x < endx; x+=BLOCK_SIZE){ + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32); + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32); + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32); + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32); + + RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride, + srcBlock + srcStride*copyAhead, srcStride, + mode & LEVEL_FIX, &c.packedYOffset); + + if(mode & LINEAR_IPOL_DEINT_FILTER) + RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); + else if(mode & LINEAR_BLEND_DEINT_FILTER) + RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x); + else if(mode & MEDIAN_DEINT_FILTER) + RENAME(deInterlaceMedian)(dstBlock, dstStride); + else if(mode & CUBIC_IPOL_DEINT_FILTER) + RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); + else if(mode & FFMPEG_DEINT_FILTER) + RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); + else if(mode & LOWPASS5_DEINT_FILTER) + RENAME(deInterlaceL5)(dstBlock, dstStride, + c.deintTemp + x, c.deintTemp + width + x); + /* else if(mode & CUBIC_BLEND_DEINT_FILTER) + RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); + */ + dstBlock+=8; + srcBlock+=8; + } dstBlock = dstBlockStart; srcBlock = srcBlockStart; for(x = startx, qp_index = 0; x < endx; x+=BLOCK_SIZE, qp_index++){ - const int stride= dstStride; - //temporary while changing QP stuff to make things continue to work - //eventually QP,nonBQP,etc will be arrays and this will be unnecessary - c.QP = c.QP_block[qp_index]; - c.nonBQP = c.nonBQP_block[qp_index]; - c.pQPb = c.pQPb_block[qp_index]; - c.pQPb2 = c.pQPb2_block[qp_index]; - - - /* only deblock if we have 2 blocks */ - if(y + 8 < height){ - if(mode & V_X1_FILTER) - RENAME(vertX1Filter)(dstBlock, stride, &c); - else if(mode & V_DEBLOCK){ - const int t= RENAME(vertClassify)(dstBlock, stride, &c); - - if(t==1) - RENAME(doVertLowPass)(dstBlock, stride, &c); - else if(t==2) - RENAME(doVertDefFilter)(dstBlock, stride, &c); - }else if(mode & V_A_DEBLOCK){ - RENAME(do_a_deblock)(dstBlock, stride, 1, &c, mode); - } - } - - dstBlock+=8; - srcBlock+=8; + const int stride= dstStride; + //temporary while changing QP stuff to make things continue to work + //eventually QP,nonBQP,etc will be arrays and this will be unnecessary + c.QP = c.QP_block[qp_index]; + c.nonBQP = c.nonBQP_block[qp_index]; + c.pQPb = c.pQPb_block[qp_index]; + c.pQPb2 = c.pQPb2_block[qp_index]; + + + /* only deblock if we have 2 blocks */ + if(y + 8 < height){ + if(mode & V_X1_FILTER){ + RENAME(vertX1Filter)(dstBlock, stride, &c); + } else if(mode & V_DEBLOCK){ + const int t= RENAME(vertClassify)(dstBlock, stride, &c); + + if(t==1) + RENAME(doVertLowPass)(dstBlock, stride, &c); + else if(t==2) + RENAME(doVertDefFilter)(dstBlock, stride, &c); + } else if(mode & V_A_DEBLOCK){ + RENAME(do_a_deblock)(dstBlock, stride, 1, &c, mode); + } + + dstBlock+=8; + srcBlock+=8; + } } dstBlock = dstBlockStart; srcBlock = srcBlockStart; for(x = startx, qp_index=0; x < endx; x+=BLOCK_SIZE, qp_index++){ - const int stride= dstStride; - av_unused uint8_t *tmpXchg; - c.QP = c.QP_block[qp_index]; - c.nonBQP = c.nonBQP_block[qp_index]; - c.pQPb = c.pQPb_block[qp_index]; - c.pQPb2 = c.pQPb2_block[qp_index]; + const int stride= dstStride; + av_unused uint8_t *tmpXchg; + c.QP = c.QP_block[qp_index]; + c.nonBQP = c.nonBQP_block[qp_index]; + c.pQPb = c.pQPb_block[qp_index]; + c.pQPb2 = c.pQPb2_block[qp_index]; #if TEMPLATE_PP_MMX - RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); + RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); #endif - /* check if we have a previous block to deblock it with dstBlock */ - if(x - 8 >= 0){ + /* check if we have a previous block to deblock it with dstBlock */ + if(x - 8 >= 0){ #if TEMPLATE_PP_MMX - if(mode & H_X1_FILTER) - RENAME(vertX1Filter)(tempBlock1, 16, &c); - else if(mode & H_DEBLOCK){ - const int t= RENAME(vertClassify)(tempBlock1, 16, &c); - if(t==1) - RENAME(doVertLowPass)(tempBlock1, 16, &c); - else if(t==2) - RENAME(doVertDefFilter)(tempBlock1, 16, &c); - }else if(mode & H_A_DEBLOCK){ - RENAME(do_a_deblock)(tempBlock1, 16, 1, &c, mode); - } - - RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16); + if(mode & H_X1_FILTER){ + RENAME(vertX1Filter)(tempBlock1, 16, &c); + } else if(mode & H_DEBLOCK){ + const int t= RENAME(vertClassify)(tempBlock1, 16, &c); + if(t==1) + RENAME(doVertLowPass)(tempBlock1, 16, &c); + else if(t==2) + RENAME(doVertDefFilter)(tempBlock1, 16, &c); + } else if(mode & H_A_DEBLOCK){ + RENAME(do_a_deblock)(tempBlock1, 16, 1, &c, mode); + } + + RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16); #else - if(mode & H_X1_FILTER) - horizX1Filter(dstBlock-4, stride, c.QP); - else if(mode & H_DEBLOCK){ + if(mode & H_X1_FILTER){ + horizX1Filter(dstBlock-4, stride, c.QP); + } else if(mode & H_DEBLOCK){ #if TEMPLATE_PP_ALTIVEC - DECLARE_ALIGNED(16, unsigned char, tempBlock)[272]; - int t; - transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); - - t = vertClassify_altivec(tempBlock-48, 16, &c); - if(t==1) { - doVertLowPass_altivec(tempBlock-48, 16, &c); - transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); - } - else if(t==2) { - doVertDefFilter_altivec(tempBlock-48, 16, &c); - transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); - } + DECLARE_ALIGNED(16, unsigned char, tempBlock)[272]; + int t; + transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); + + t = vertClassify_altivec(tempBlock-48, 16, &c); + if(t==1) { + doVertLowPass_altivec(tempBlock-48, 16, &c); + transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); + } + else if(t==2) { + doVertDefFilter_altivec(tempBlock-48, 16, &c); + transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); + } #else - const int t= RENAME(horizClassify)(dstBlock-4, stride, &c); + const int t= RENAME(horizClassify)(dstBlock-4, stride, &c); - if(t==1) - RENAME(doHorizLowPass)(dstBlock-4, stride, &c); - else if(t==2) - RENAME(doHorizDefFilter)(dstBlock-4, stride, &c); + if(t==1) + RENAME(doHorizLowPass)(dstBlock-4, stride, &c); + else if(t==2) + RENAME(doHorizDefFilter)(dstBlock-4, stride, &c); #endif - }else if(mode & H_A_DEBLOCK){ - RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c, mode); - } + } else if(mode & H_A_DEBLOCK){ + RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c, mode); + } #endif //TEMPLATE_PP_MMX - if(mode & DERING){ - //FIXME filter first line - if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); - } - - if(mode & TEMP_NOISE_FILTER) - { - RENAME(tempNoiseReducer)(dstBlock-8, stride, - c.tempBlurred[isColor] + y*dstStride + x, - c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, - c.ppMode.maxTmpNoise); - } - } - - dstBlock+=8; - srcBlock+=8; + if(mode & DERING){ + //FIXME filter first line + if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); + } + + if(mode & TEMP_NOISE_FILTER){ + RENAME(tempNoiseReducer)(dstBlock-8, stride, + c.tempBlurred[isColor] + y*dstStride + x, + c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, + c.ppMode.maxTmpNoise); + } + } + + dstBlock+=8; + srcBlock+=8; #if TEMPLATE_PP_MMX - tmpXchg= tempBlock1; - tempBlock1= tempBlock2; - tempBlock2 = tmpXchg; + tmpXchg= tempBlock1; + tempBlock1= tempBlock2; + tempBlock2 = tmpXchg; #endif } } @@ -3624,9 +3631,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ if((mode & TEMP_NOISE_FILTER)){ RENAME(tempNoiseReducer)(dstBlock-8, dstStride, - c.tempBlurred[isColor] + y*dstStride + x, - c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, - c.ppMode.maxTmpNoise); + c.tempBlurred[isColor] + y*dstStride + x, + c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, + c.ppMode.maxTmpNoise); } /* did we use a tmp buffer for the last lines*/ @@ -3661,12 +3668,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ int end=yHistogram[i]/(max/256+1); int inc= end > start ? 1 : -1; for(x=start; x!=end+inc; x+=inc) - dst[ i*dstStride + x]+=128; + dst[i*dstStride + x]+=128; } for(i=0; i<100; i+=2){ - dst[ (white)*dstStride + i]+=128; - dst[ (black)*dstStride + i]+=128; + dst[(white)*dstStride + i]+=128; + dst[(black)*dstStride + i]+=128; } } #endif @@ -3679,6 +3686,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ #undef TEMPLATE_PP_C #undef TEMPLATE_PP_ALTIVEC #undef TEMPLATE_PP_MMX + #undef TEMPLATE_PP_MMXEXT #undef TEMPLATE_PP_3DNOW #undef TEMPLATE_PP_SSE2 -- 2.3.3 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel