On Tue, Nov 12, 2013 at 5:49 AM, <[email protected]> wrote:
> # HG changeset patch > # User Shazeb Nawaz Khan <[email protected]> > # Date 1384256247 -19800 > # Tue Nov 12 17:07:27 2013 +0530 > # Node ID 21596a519ba8cc521dbc81f693c867cbca03fd3f > # Parent e7319fd46128b3bfcc826ea9be02896b316ed966 > Pulling x264 weight decision into/for x265 lookahead > > diff -r e7319fd46128 -r 21596a519ba8 source/encoder/slicetype.cpp > --- a/source/encoder/slicetype.cpp Tue Nov 12 17:06:03 2013 +0530 > +++ b/source/encoder/slicetype.cpp Tue Nov 12 17:07:27 2013 +0530 > @@ -45,6 +45,14 @@ > > using namespace x265; > > +#define SET_WEIGHT( w, b, s, d, o )\ > +{\ > + (w).inputWeight = (s);\ > + (w).log2WeightDenom = (d);\ > + (w).inputOffset = (o);\ > + (w).bPresentFlag = b;\ > +} > + > static inline int16_t median(int16_t a, int16_t b, int16_t c) > { > int16_t t = (a - b) & ((a - b) >> 31); > @@ -190,16 +198,329 @@ > return pic->m_lowres.satdCost; > } > > +/* makes a non-h265 weight (i.e. fix7), into an h265 weight */ > +static void x265_weight_get_h265( int weight_nonh264, int offset, > wpScalingParam *w ) > this should be made into a wpScalingParam method.. something like setFromWeightAndOffset() > +{ > + w->inputOffset = offset; > + w->log2WeightDenom = 7; > + w->inputWeight = weight_nonh264; > + while( w->log2WeightDenom > 0 && (w->inputWeight > 127) ) > + { > + w->log2WeightDenom--; > + w->inputWeight >>= 1; > + } > + w->inputWeight = X265_MIN( w->inputWeight, 127 ); > +} > + > +pixel* Lookahead::x265_weight_cost_init_luma( int b, int p0, pixel *dest ) > weightCostInit() white-space > +{ > + Lowres *fenc, *ref; > + fenc = frames[b]; > + ref = frames[p0]; > + int ref0_distance = b - p0 - 1; > + /* Note: this will never run during lookahead as weights_analyse is > only called if no > + * motion search has been done. */ > + if( fenc->lowresMvs[0][ref0_distance][0].x != 0x7FFF ) > + { > + int i_stride = fenc->lumaStride; > + int i_lines = fenc->lines; > + int i_width = fenc->width; > + int i_mb_xy = 0; > + pixel *p = dest; > + > + for( int y = 0; y < i_lines; y += 8, p += i_stride*8 ) > + for( int x = 0; x < i_width; x += 8, i_mb_xy++ ) > + { > + int mvx = fenc->lowresMvs[0][ref0_distance][i_mb_xy].x; > + int mvy = fenc->lowresMvs[0][ref0_distance][i_mb_xy].y; > + mvx;mvy; > + //h->mc.mc_luma( p+x, i_stride, ref->lowresPlane, > i_stride, > + // mvx+(x<<2), mvy+(y<<2), 8, 8, > x264_weight_none ); > + } > + x265_emms(); > + return dest; > + } > + x265_emms(); > + return ref->lowresPlane[0]; > +} > this whole function is not used for lookahead and should be removed, not to mention it is unfinished > + > +static void mc_weight( pixel *dst, intptr_t i_dst_stride, pixel *src, > intptr_t i_src_stride, > + const wpScalingParam *weight, int i_width, int > i_height ) > white-space, do not copy i_ prefixes from x264 > +{ > + int offset = weight->inputOffset << (g_bitDepth - 8); > use X265_DEPTH, not g_bitDepth > + int scale = weight->inputWeight; > + int denom = weight->log2WeightDenom; > + int corection = (IF_INTERNAL_PREC - X265_DEPTH); > correction > + if( denom >= 1 ) > white-space.. please run uncrustify on the file before re-submitting > + { > + primitives.weightpUniPixel(src, dst, i_src_stride, i_dst_stride, > i_width, i_height, scale, (1<<(denom - 1 + corection)), (denom + > corection), offset); > + } > + else > + { > + primitives.weightpUniPixel(src, dst, i_src_stride, i_dst_stride, > i_width, i_height, scale, 0 + corection, 0 + corection, offset); > + } > +} > + > +unsigned int Lookahead::x265_weight_cost_luma( int b, pixel *src, > wpScalingParam *w ) > weightCostLuma() > +{ > + Lowres *fenc = frames[b]; > + unsigned int cost = 0; > + int i_stride = fenc->lumaStride; > + int i_lines = fenc->lines; > + int i_width = fenc->width; > no i_ prefixes > + pixel *fenc_plane = fenc->lowresPlane[0]; > + ALIGN_VAR_16( pixel, buf[8*8]); > + int pixoff = 0; > + int i_mb = 0; > + > + if( w ) > + { > + for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride ) > + for( int x = 0; x < i_width; x += 8, i_mb++, pixoff += 8) > + { > + // TO DO prepare full weighted plane > + mc_weight(buf, 8, &src[pixoff], i_stride, w, 8, 8); > + int cmp = primitives.satd[LUMA_8x8]( buf, 8, > &fenc_plane[pixoff], i_stride ); > + cost += X265_MIN( cmp, fenc->intraCost[i_mb] ); > + } > + } > + else > + for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride ) > + for( int x = 0; x < i_width; x += 8, i_mb++, pixoff += 8 ) > + { > + int cmp = primitives.satd[LUMA_8x8]( &src[pixoff], > i_stride, &fenc_plane[pixoff], i_stride ); > + cost += X265_MIN( cmp, fenc->intraCost[i_mb] ); > + } > + x265_emms(); > + return cost; > +} > this function is also not used, and should be removed > +void Lookahead::weightsAnalyse(int b, int p0, int b_lookahead, > wpScalingParam* w) > +{ > remove b_lookahead, assume it is always 1, simplify the code below. > + //int i_delta_index = b - p0 - 1; > remove > + Lowres *fenc, *ref; > + fenc = frames[b]; > + ref = frames[p0]; > + /* epsilon is chosen to require at least a numerator of 127 (with > denominator = 128) */ > + const float epsilon = 1.f/128.f; > + wpScalingParam *weights = w; > yuck, just rename function argument > + SET_WEIGHT( weights[0], 0, 1, 0, 0 ); > > + //SET_WEIGHT( weights[1], 0, 1, 0, 0 ); > + //SET_WEIGHT( weights[2], 0, 1, 0, 0 ); > remove these two > + float guess_scale[3]; > + float fenc_mean[3]; > + float ref_mean[3]; > + for( int plane = 0; plane <= 2*!b_lookahead; plane++ ) > + { > + float fenc_var = (float) fenc->wp_ssd[plane] + > !ref->wp_ssd[plane]; > + float ref_var = (float) ref->wp_ssd[plane] + > !ref->wp_ssd[plane]; > + guess_scale[plane] = sqrtf( fenc_var / ref_var ); > + fenc_mean[plane] = (float)fenc->wp_sum[plane] / > ((fenc->lines>>(plane?1:0)) * (fenc->width>>(plane?1:0))) /*/ (1 << > (BIT_DEPTH - 8))*/; > + ref_mean[plane] = (float) ref->wp_sum[plane] / > ((fenc->lines>>(plane?1:0)) * (fenc->width>>(plane?1:0))) /*/ (1 << > (BIT_DEPTH - 8))*/; > + } > + > + //int chroma_denom = 7; > + //if( !b_lookahead ) > + //{ > + // /* make sure both our scale factors fit */ > + // while( chroma_denom > 0 ) > + // { > + // float thresh = 127.f / (1<<chroma_denom); > + // if( guess_scale[1] < thresh && guess_scale[2] < thresh ) > + // break; > + // chroma_denom--; > + // } > + //} > bye > + > + /* Don't check chroma in lookahead, or if there wasn't a luma weight. > */ > + for( int plane = 0; plane <= 2 && !( plane && ( > /*!weights[0].weightfn*/ !fenc->isWeighted || b_lookahead ) ); plane++ ) > + { > + int minoff, minscale, mindenom; > + unsigned int minscore = 0, origscore = 1; > + origscore; > + int found; > + > + //early termination > + if( fabsf( ref_mean[plane] - fenc_mean[plane] ) < 0.5f && fabsf( > 1.f - guess_scale[plane] ) < epsilon ) > + { > + SET_WEIGHT( weights[plane], 0, 1, 0, 0 ); > + //printf("\nEarly\n"); > + continue; > + } > + > + if( plane ) > + { > + //weights[plane].i_denom = chroma_denom; > + //weights[plane].i_scale = x264_clip3( round( > guess_scale[plane] * (1<<chroma_denom) ), 0, 255 ); > + //if( weights[plane].i_scale > 127 ) > + //{ > + // weights[1].weightfn = weights[2].weightfn = NULL; > + // break; > + //} > + } > + else > + x265_weight_get_h265( (int)( guess_scale[plane] * 128 + 0.5), > 0, &weights[plane] ); > + > + found = 0; > + mindenom = weights[plane].log2WeightDenom; > + minscale = weights[plane].inputWeight; > + minoff = 0; > + > + pixel *mcbuf = NULL; > + mcbuf; > + if( !plane ) > + { > + if( !fenc->bIntraCalculated ) > + { > + estimateFrameCost(b,b,b,0); > + } > + mcbuf = x265_weight_cost_init_luma( b, p0, NULL ); > + origscore = minscore = x265_weight_cost_luma( b, mcbuf, NULL > ); > + } > + else > + { > + //if( CHROMA444 ) > + //{ > + // mcbuf = x264_weight_cost_init_chroma444( h, fenc, ref, > h->mb.p_weight_buf[0], plane ); > + // origscore = minscore = x264_weight_cost_chroma444( h, > fenc, mcbuf, NULL, plane ); > + //} > + //else > + //{ > + //pixel *dstu = h->mb.p_weight_buf[0]; > + //pixel *dstv = > h->mb.p_weight_buf[0]+fenc->i_stride[1]*fenc->i_lines[1]; > + //if( !chroma_initted++ ) > + // x264_weight_cost_init_chroma( h, fenc, ref, dstu, > dstv ); > + //mcbuf = plane == 1 ? dstu : dstv; > + //origscore = minscore = x264_weight_cost_chroma( h, > fenc, mcbuf, NULL ); > + //} > remove > + } > + > + if( !minscore ) > + continue; > + > + /* Picked somewhat arbitrarily */ > + static const uint8_t weight_check_distance[][2] = > + { > + {0,0},{0,0},{0,1},{0,1}, > + {0,1},{0,1},{0,1},{1,1}, > + {1,1},{2,1},{2,1},{4,2} > + }; > + int scale_dist = b_lookahead ? 0 : > weight_check_distance[cfg->param.subpelRefine][0]; > + int offset_dist = b_lookahead ? 0 : > weight_check_distance[cfg->param.subpelRefine][1]; > this all goes away > + > + int start_scale = Clip3( 0, 127, minscale - scale_dist ); > + int end_scale = Clip3( 0, 127, minscale + scale_dist ); > + unsigned int s=0; > + for( int i_scale = start_scale; i_scale <= end_scale; i_scale++ ) > + { > + int cur_scale = i_scale; > + int cur_offset = (int) (fenc_mean[plane] - ref_mean[plane] * > cur_scale / (1 << mindenom) + 0.5f * b_lookahead); > + if( cur_offset < - 128 || cur_offset > 127 ) > + { > + /* Rescale considering the constraints on cur_offset. We > do it in this order > + * because scale has a much wider range than offset > (because of denom), so > + * it should almost never need to be clamped. */ > + cur_offset = Clip3( -128, 127, cur_offset ); > + cur_scale = (int) ((1 << mindenom) * (fenc_mean[plane] - > cur_offset) / ref_mean[plane] + 0.5f); > + cur_scale = Clip3( 0, 127, cur_scale ); > + } > + int start_offset = Clip3( -128, 127, cur_offset - offset_dist > ); > + int end_offset = Clip3( -128, 127, cur_offset + offset_dist > ); > + for( int i_off = start_offset; i_off <= end_offset; i_off++ ) > + { > + SET_WEIGHT( weights[plane], 1, cur_scale, mindenom, i_off > ); > + //unsigned int s; > + //if( plane ) > + //{ > + // //if( CHROMA444 ) > + // // s = x264_weight_cost_chroma444( h, fenc, > mcbuf, &weights[plane], plane ); > + // //else > + // s = x264_weight_cost_chroma( h, fenc, mcbuf, > &weights[plane] ); > + //} > + //else > + { > + s = x265_weight_cost_luma( b, mcbuf, &weights[plane] > ); > + } > + COPY4_IF_LT( minscore, s, minscale, cur_scale, minoff, > i_off, found, 1 ); > + > + // Don't check any more offsets if the previous one had a > lower cost than the current one > + if( minoff == start_offset && i_off != start_offset ) > + break; > + } > + } > + x265_emms(); > + > + /* Use a smaller denominator if possible */ > + if( !plane ) > + { > + while( mindenom > 0 && !(minscale&1) ) > + { > + mindenom--; > + minscale >>= 1; > + } > + } > + > + /* FIXME: More analysis can be done here on SAD vs. SATD > termination. */ > + /* 0.2% termination derived experimentally to avoid weird weights > in frames that are mostly intra. */ > + if( !found || (minscale == 1 << mindenom && minoff == 0) || > (float)minscore / origscore > 0.998f ) > + { > + SET_WEIGHT( weights[plane], 0, 1, 0, 0 ); > + continue; > + } > + else > + { > + SET_WEIGHT( weights[plane], 1, minscale, mindenom, minoff ); > + } > + > + //if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && > weights[0].weightfn && !plane ) > + // fenc->f_weighted_cost_delta[i_delta_index] = > (float)minscore / origscore; > + } > + > + ///* Optimize and unify denominator */ > + //if( weights[1].weightfn || weights[2].weightfn ) > + //{ > + // int denom = weights[1].weightfn ? weights[1].i_denom : > weights[2].i_denom; > + // int both_weighted = weights[1].weightfn && weights[2].weightfn; > + // /* If only one plane is weighted, the other has an implicit > scale of 1<<denom. > + // * With denom==7, this comes out to 128, which is invalid, so > don't allow that. */ > + // while( (!both_weighted && denom==7) || > + // (denom > 0 && !(weights[1].weightfn && > (weights[1].i_scale&1)) > + // && !(weights[2].weightfn && > (weights[2].i_scale&1))) ) > + // { > + // denom--; > + // for( int i = 1; i <= 2; i++ ) > + // if( weights[i].weightfn ) > + // { > + // weights[i].i_scale >>= 1; > + // weights[i].i_denom = denom; > + // } > + // } > + //} > + //for( int i = 1; i <= 2; i++ ) > + // if( weights[i].weightfn ) > + // h->mc.weight_cache( h, &weights[i] ); > + > + //if( weights[0].weightfn && b_lookahead ) > + //{ > + // //scale lowres in lookahead for slicetype_frame_cost > + // pixel *src = ref->buffer_lowres[0]; > + // pixel *dst = h->mb.p_weight_buf[0]; > + // int width = ref->i_width_lowres + PADH*2; > + // int height = ref->i_lines_lowres + PADV*2; > + // x264_weight_scale_plane( h, dst, ref->i_stride_lowres, src, > ref->i_stride_lowres, > + // width, height, &weights[0] ); > + // fenc->weighted[0] = h->mb.p_weight_buf[0] + PADH + > ref->i_stride_lowres * PADV; > + //} > remove; we should never have commented code blocks > +} > + > #define NUM_CUS (widthInCU > 2 && heightInCU > 2 ? (widthInCU - 2) * > (heightInCU - 2) : widthInCU * heightInCU) > > int Lookahead::estimateFrameCost(int p0, int p1, int b, bool > bIntraPenalty) > { > int score = 0; > Lowres *fenc = frames[b]; > - > - curb = b; > - curp0 = p0; > - curp1 = p1; > + wpScalingParam wp; > + wp.bPresentFlag = false; > > if (fenc->costEst[b - p0][p1 - b] >= 0 && fenc->rowSatds[b - p0][p1 - > b][0] != -1) > score = fenc->costEst[b - p0][p1 - b]; > @@ -209,9 +530,23 @@ > bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x == > 0x7FFF; > bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x == > 0x7FFF; > > - if (bDoSearch[0]) fenc->lowresMvs[0][b - p0 - 1][0].x = 0; > + if (bDoSearch[0]) > + { > + if( cfg->param.bEnableWeightedPred && b==p1) > white-space, I'll quit commenting on this > + { > + wp.bPresentFlag = false; > + wp.inputWeight = 0; > this re-initialization is unnecessary > + weightsAnalyse(b, p0, 1, &wp); > + } > + bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x == > 0x7FFF; > + bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x == > 0x7FFF; > these two lines are out-of-place, probably need to be deleted > + fenc->lowresMvs[0][b - p0 - 1][0].x = 0; > + } > if (bDoSearch[1]) fenc->lowresMvs[1][p1 - b - 1][0].x = 0; > > + curb = b; > + curp0 = p0; > + curp1 = p1; > these look unused > fenc->costEst[b - p0][p1 - b] = 0; > fenc->costEstAq[b - p0][p1 - b] = 0; > // TODO: use lowres MVs as motion candidates in full-res search > @@ -572,6 +907,15 @@ > brefs++; > } */ > > + ///* Analyse for weighted P frames */ > + //if (/*!h->param.rc.b_stat_read &&*/ frames[bframes]->sliceType > == X265_TYPE_P && cfg->param.bEnableWeightedPred > + // /*&& h->param.analyse.i_weighted_pred >= > X264_WEIGHTP_SIMPLE*/) > + //{ > + // x265_emms(); > + // //x264_weights_analyse(h, h->lookahead->next.list[bframes], > h->lookahead->last_nonb, 0); > + // weightsAnalyse(bframes, 0, 1); > + //} > + > this will not be called from here, please remove > /* calculate the frame costs ahead of time for > x264_rc_analyse_slice while we still have lowres */ > if (cfg->param.rc.rateControlMode != X265_RC_CQP) > { > @@ -613,14 +957,6 @@ > } */ > } > > - /* Analyse for weighted P frames > - if (!h->param.rc.b_stat_read && > h->lookahead->next.list[bframes]->i_type == X264_TYPE_P > - && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE) > - { > - x265_emms(); > - x264_weights_analyse(h, h->lookahead->next.list[bframes], > h->lookahead->last_nonb, 0); > - }*/ > - > /* dequeue all frames from inputQueue that are about to be > enqueued > * in the output queue. The order is important because TComPic > can > * only be in one list at a time */ > diff -r e7319fd46128 -r 21596a519ba8 source/encoder/slicetype.h > --- a/source/encoder/slicetype.h Tue Nov 12 17:06:03 2013 +0530 > +++ b/source/encoder/slicetype.h Tue Nov 12 17:07:27 2013 +0530 > @@ -47,11 +47,13 @@ > int costIntra; // Estimated Intra cost for all > CUs in a row > int costIntraAq; // Estimated weighted Aq Intra > cost for all CUs in a row > int intraMbs; // Number of Intra CUs > + TEncCfg *cfg; > > Lowres** frames; > int widthInCU; > int heightInCU; > int merange; > + Lowres *weightedRef; > > LookaheadRow() > { > @@ -82,6 +84,9 @@ > int widthInCU; // width of lowres frame in > downscale CUs > int heightInCU; // height of lowres frame in > downscale CUs > > + Lowres *weightedRef; > + int numWRefs; > + > PicList inputQueue; // input pictures in order received > PicList outputQueue; // pictures to be encoded, in encode order > > @@ -110,6 +115,11 @@ > int slicetypePathCost(char *path, int threshold); > > void processRow(int row); > + > + void weightsAnalyse(int b, int p0, int b_lookahead, wpScalingParam > *w); > + unsigned int x265_weight_cost_luma( int b, pixel *src, wpScalingParam > *w ); > + pixel* x265_weight_cost_init_luma( int b, int p0, pixel *dest ); > + int x265_weight_slice_header_cost(wpScalingParam *w, int b_chroma ); > }; > } > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- Steve Borho
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
