Now instead of 3 loops of 4 blocks there's only one. Also removed some variables that became unused because of this --- libpostproc/postprocess_template.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c index 9096586..794ea17 100644 --- a/libpostproc/postprocess_template.c +++ b/libpostproc/postprocess_template.c @@ -3445,10 +3445,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ // finish 1 block before the next otherwise we might have a problem // with the L1 Cache of the P4 ... or only a few blocks at a time or something for(x=0; x<width; ){ - int startx = x; int endx = FFMIN(width, x+32); - uint8_t *dstBlockStart = dstBlock; - const uint8_t *srcBlockStart = srcBlock; int qp_index = 0; for(qp_index=0; qp_index < 4; qp_index+=1){ QP = QPptr[(x+qp_index*8)>>qpHShift]; @@ -3472,7 +3469,16 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ); #endif } + qp_index = 0; for(; x < endx; x+=BLOCK_SIZE){ + const int stride= dstStride; + av_unused uint8_t *tmpXchg; + //temporary while changing QP stuff to make things continue to work + c.QP = c.QP_block[qp_index]; + c.nonBQP = c.nonBQP_block[qp_index]; + c.pQPb = c.pQPb_block[qp_index]; + c.pQPb2 = c.pQPb2_block[qp_index++]; + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32); prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32); prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32); @@ -3496,23 +3502,6 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ /* else if(mode & CUBIC_BLEND_DEINT_FILTER) RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); */ - dstBlock+=8; - srcBlock+=8; - } - - qp_index = 0; - dstBlock = dstBlockStart; - srcBlock = srcBlockStart; - - for(x = startx; x < endx; x+=BLOCK_SIZE){ - const int stride= dstStride; - av_unused uint8_t *tmpXchg; - //temporary while changing QP stuff to make things continue to work - c.QP = c.QP_block[qp_index]; - c.nonBQP = c.nonBQP_block[qp_index]; - c.pQPb = c.pQPb_block[qp_index]; - c.pQPb2 = c.pQPb2_block[qp_index++]; - /* only deblock if we have 2 blocks */ if(y + 8 < height){ if(mode & V_X1_FILTER) -- 2.3.3 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel