Pushed to default. On Mon, Nov 25, 2019 at 7:01 PM Praveen Kumar Karadugattu < [email protected]> wrote:
> This patch has been reviewed and looks good to me. > > Regards, > Praveen > > On Mon, Nov 25, 2019 at 6:53 PM Srikanth Kurapati < > [email protected]> wrote: > >> # HG changeset patch >> # User Srikanth Kurapati <[email protected]> >> # Date 1573649311 -19800 >> # Wed Nov 13 18:18:31 2019 +0530 >> # Node ID 97a9eca413d83cd03ae0fa95957160bdf70c170b >> # Parent 04db2bfee5d628d931d1407355b909ac8ff1c898 >> Histogram Based Scene Cut Detection. >> >> This patch does the following. >> 1.Finds scene cuts by thresholding normalized SAD of edge and chroma >> histograms. >> 2.Add option "--hist-scenecut" to enable histogram based scene cut >> detection. >> 3.Add option "--hist-threshold" to provide threshold for determining >> scene cuts. >> 3.Optimizes frame duplication by reusing normalized SAD to mark duplicate >> frames. >> >> diff -r 04db2bfee5d6 -r 97a9eca413d8 doc/reST/cli.rst >> --- a/doc/reST/cli.rst Thu Oct 31 16:23:27 2019 +0530 >> +++ b/doc/reST/cli.rst Wed Nov 13 18:18:31 2019 +0530 >> @@ -1426,7 +1426,20 @@ >> This value represents the percentage difference between the inter cost >> and >> intra cost of a frame used in scenecut detection. For example, a value >> of 5 indicates, >> if the inter cost of a frame is greater than or equal to 95 percent of >> the intra cost of the frame, >> - then detect this frame as scenecut. Values between 5 and 15 are >> recommended. Default 5. >> + then detect this frame as scenecut. Values between 5 and 15 are >> recommended. Default 5. >> + >> +.. option:: --hist-scenecut, --no-hist-scenecut >> + >> + Indicates that scenecuts need to be detected using luma edge and chroma >> histograms. >> + option: `--hist-scenecut` enables scenecut detection using the >> histograms and disables the default scene cut algorithm. >> + option: `--no-hist-scenecut` disables histogram based scenecut >> algorithm. >> + >> +.. option:: --hist-threshold <0.0..2.0> >> + >> + This value represents the threshold for normalized SAD of edge >> histograms used in scenecut detection. >> + This requires option: `--hist-scenecut` to be enabled. For example, a >> value of 0.2 indicates that a frame with normalized SAD value >> + greater than 0.2 against the previous frame as scenecut. >> + Default 0.01. >> >> .. option:: --radl <integer> >> >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/CMakeLists.txt >> --- a/source/CMakeLists.txt Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/CMakeLists.txt Wed Nov 13 18:18:31 2019 +0530 >> @@ -29,7 +29,7 @@ >> option(STATIC_LINK_CRT "Statically link C runtime for release builds" >> OFF) >> mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) >> # X265_BUILD must be incremented each time the public API is changed >> -set(X265_BUILD 182) >> +set(X265_BUILD 183) >> configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" >> "${PROJECT_BINARY_DIR}/x265.def") >> configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/common/common.h >> --- a/source/common/common.h Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/common/common.h Wed Nov 13 18:18:31 2019 +0530 >> @@ -129,12 +129,16 @@ >> typedef uint64_t sum2_t; >> typedef uint64_t pixel4; >> typedef int64_t ssum2_t; >> +#define HISTOGRAM_BINS 1024 >> +#define SHIFT 1 >> #else >> typedef uint8_t pixel; >> typedef uint16_t sum_t; >> typedef uint32_t sum2_t; >> typedef uint32_t pixel4; >> typedef int32_t ssum2_t; // Signed sum >> +#define HISTOGRAM_BINS 256 >> +#define SHIFT 0 >> #endif // if HIGH_BIT_DEPTH >> >> #if X265_DEPTH < 10 >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/common/param.cpp >> --- a/source/common/param.cpp Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/common/param.cpp Wed Nov 13 18:18:31 2019 +0530 >> @@ -167,6 +167,8 @@ >> param->bFrameAdaptive = X265_B_ADAPT_TRELLIS; >> param->bBPyramid = 1; >> param->scenecutThreshold = 40; /* Magic number pulled in from x264 */ >> + param->edgeTransitionThreshold = 0.01; >> + param->bHistBasedSceneCut = 0; >> param->lookaheadSlices = 8; >> param->lookaheadThreads = 0; >> param->scenecutBias = 5.0; >> @@ -572,6 +574,7 @@ >> param->bframes = 0; >> param->lookaheadDepth = 0; >> param->scenecutThreshold = 0; >> + param->bHistBasedSceneCut = 0; >> param->rc.cuTree = 0; >> param->frameNumThreads = 1; >> } >> @@ -920,12 +923,13 @@ >> OPT("lookahead-slices") p->lookaheadSlices = atoi(value); >> OPT("scenecut") >> { >> - p->scenecutThreshold = atobool(value); >> - if (bError || p->scenecutThreshold) >> - { >> - bError = false; >> - p->scenecutThreshold = atoi(value); >> - } >> + p->scenecutThreshold = atobool(value); >> + if (bError || p->scenecutThreshold) >> + { >> + bError = false; >> + p->scenecutThreshold = atoi(value); >> + p->bHistBasedSceneCut = 0; >> + } >> } >> OPT("temporal-layers") p->bEnableTemporalSubLayers = atobool(value); >> OPT("keyint") p->keyframeMax = atoi(value); >> @@ -1191,6 +1195,21 @@ >> OPT("opt-ref-list-length-pps") p->bOptRefListLengthPPS = >> atobool(value); >> OPT("multi-pass-opt-rps") p->bMultiPassOptRPS = atobool(value); >> OPT("scenecut-bias") p->scenecutBias = atof(value); >> + OPT("hist-scenecut") >> + { >> + p->bHistBasedSceneCut = atobool(value); >> + if (bError) >> + { >> + bError = false; >> + p->bHistBasedSceneCut = 0; >> + } >> + if (p->bHistBasedSceneCut) >> + { >> + bError = false; >> + p->scenecutThreshold = 0; >> + } >> + } >> + OPT("hist-threshold") p->edgeTransitionThreshold = atof(value); >> OPT("lookahead-threads") p->lookaheadThreads = atoi(value); >> OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value); >> OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = >> atobool(value); >> @@ -1632,7 +1651,9 @@ >> CHECK(param->scenecutThreshold < 0, >> "scenecutThreshold must be greater than 0"); >> CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias, >> - "scenecut-bias must be between 0 and 100"); >> + "scenecut-bias must be between 0 and 100"); >> + CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 < >> param->edgeTransitionThreshold, >> + "hist-threshold must be between 0.0 and 2.0"); >> CHECK(param->radl < 0 || param->radl > param->bframes, >> "radl must be between 0 and bframes"); >> CHECK(param->rdPenalty < 0 || param->rdPenalty > 2, >> @@ -1792,9 +1813,13 @@ >> x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge >> : %s / %d / %d / %d\n", >> x265_motion_est_names[param->searchMethod], >> param->searchRange, param->subpelRefine, param->maxNumMergeCand); >> >> - if (param->keyframeMax != INT_MAX || param->scenecutThreshold) >> - x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / >> bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax, >> param->scenecutThreshold, param->scenecutBias * 100); >> - else >> + if (param->scenecutThreshold && param->keyframeMax != INT_MAX) >> + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / >> bias : %d / %d / %d / %.2lf \n", >> + param->keyframeMin, param->keyframeMax, >> param->scenecutThreshold, param->scenecutBias * 100); >> + else if (param->bHistBasedSceneCut && param->keyframeMax != INT_MAX) >> + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / >> edge threshold : %d / %d / %d / %.2lf\n", >> + param->keyframeMin, param->keyframeMax, >> param->bHistBasedSceneCut, param->edgeTransitionThreshold); >> + else if (param->keyframeMax == INT_MAX) >> x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut >> : disabled\n"); >> >> if (param->cbQpOffset || param->crQpOffset) >> @@ -1961,6 +1986,7 @@ >> s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth); >> s += sprintf(s, " lookahead-slices=%d", p->lookaheadSlices); >> s += sprintf(s, " scenecut=%d", p->scenecutThreshold); >> + s += sprintf(s, " hist-scenecut=%d", p->bHistBasedSceneCut); >> s += sprintf(s, " radl=%d", p->radl); >> BOOL(p->bEnableHRDConcatFlag, "splice"); >> BOOL(p->bIntraRefresh, "intra-refresh"); >> @@ -2108,6 +2134,7 @@ >> BOOL(p->bOptRefListLengthPPS, "opt-ref-list-length-pps"); >> BOOL(p->bMultiPassOptRPS, "multi-pass-opt-rps"); >> s += sprintf(s, " scenecut-bias=%.2f", p->scenecutBias); >> + s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold); >> BOOL(p->bOptCUDeltaQP, "opt-cu-delta-qp"); >> BOOL(p->bAQMotion, "aq-motion"); >> BOOL(p->bEmitHDRSEI, "hdr"); >> @@ -2261,6 +2288,7 @@ >> dst->lookaheadSlices = src->lookaheadSlices; >> dst->lookaheadThreads = src->lookaheadThreads; >> dst->scenecutThreshold = src->scenecutThreshold; >> + dst->bHistBasedSceneCut = src->bHistBasedSceneCut; >> dst->bIntraRefresh = src->bIntraRefresh; >> dst->maxCUSize = src->maxCUSize; >> dst->minCUSize = src->minCUSize; >> @@ -2420,6 +2448,7 @@ >> dst->bOptRefListLengthPPS = src->bOptRefListLengthPPS; >> dst->bMultiPassOptRPS = src->bMultiPassOptRPS; >> dst->scenecutBias = src->scenecutBias; >> + dst->edgeTransitionThreshold = src->edgeTransitionThreshold; >> dst->gopLookahead = src->lookaheadDepth; >> dst->bOptCUDeltaQP = src->bOptCUDeltaQP; >> dst->analysisMultiPassDistortion = src->analysisMultiPassDistortion; >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/encoder.cpp >> --- a/source/encoder/encoder.cpp Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/encoder/encoder.cpp Wed Nov 13 18:18:31 2019 +0530 >> @@ -130,12 +130,17 @@ >> #if SVT_HEVC >> m_svtAppData = NULL; >> #endif >> - >> m_prevTonemapPayload.payload = NULL; >> m_startPoint = 0; >> m_saveCTUSize = 0; >> + m_edgePic = NULL; >> + m_edgeHistThreshold = 0; >> + m_chromaHistThreshold = 0.0; >> + m_scaledEdgeThreshold = 0.0; >> + m_scaledChromaThreshold = 0.0; >> m_zoneIndex = 0; >> } >> + >> inline char *strcatFilename(const char *input, const char *suffix) >> { >> char *output = X265_MALLOC(char, strlen(input) + strlen(suffix) + 1); >> @@ -210,6 +215,23 @@ >> } >> } >> >> + if (m_param->bHistBasedSceneCut) >> + { >> + for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; >> i++) >> + { >> + m_planeSizes[i] = m_param->sourceWidth * >> m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]; >> + } >> + uint32_t pixelbytes = m_param->sourceBitDepth > 8 ? 2 : 1; >> + m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes); >> + m_edgeHistThreshold = m_param->edgeTransitionThreshold; >> + m_chromaHistThreshold = m_edgeHistThreshold * 10.0; >> + m_chromaHistThreshold = x265_min(m_chromaHistThreshold, >> MAX_SCENECUT_THRESHOLD); >> + m_scaledEdgeThreshold = m_edgeHistThreshold * >> SCENECUT_STRENGTH_FACTOR; >> + m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold, >> MAX_SCENECUT_THRESHOLD); >> + m_scaledChromaThreshold = m_chromaHistThreshold * >> SCENECUT_STRENGTH_FACTOR; >> + m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold, >> MAX_SCENECUT_THRESHOLD); >> + } >> + >> // Do not allow WPP if only one row or fewer than 3 columns, it is >> pointless and unstable >> if (rows == 1 || cols < 3) >> { >> @@ -854,6 +876,12 @@ >> } >> } >> >> + if (m_param->bHistBasedSceneCut) >> + { >> + if(m_edgePic != NULL) >> + X265_FREE_ZERO(m_edgePic); >> + } >> + >> for (int i = 0; i < m_param->frameNumThreads; i++) >> { >> if (m_frameEncoder[i]) >> @@ -1313,6 +1341,142 @@ >> dest->planes[2] = (char*)dest->planes[1] + src->stride[1] * >> (src->height >> x265_cli_csps[src->colorSpace].height[1]); >> } >> >> +bool Encoder::computeHistograms(x265_picture *pic) >> +{ >> + pixel *src = (pixel *) pic->planes[0]; >> + size_t bufSize = sizeof(pixel) * m_planeSizes[0]; >> + int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes; >> + int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1; >> + memset(m_edgePic, 0, bufSize * numBytes); >> + >> + if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, >> pic->width, false)) >> + { >> + x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!"); >> + return false; >> + } >> + >> + pixel pixelVal; >> + int64_t size = pic->height * (pic->stride[0] >> SHIFT); >> + int32_t *edgeHist = m_curEdgeHist; >> + memset(edgeHist, 0, 2 * sizeof(int32_t)); >> + for (int64_t i = 0; i < size; i++) >> + { >> + if (!m_edgePic[i]) >> + edgeHist[0]++; >> + else >> + edgeHist[1]++; >> + } >> + >> + if (pic->colorSpace != X265_CSP_I400) >> + { >> + /* U Histogram Calculation */ >> + int32_t HeightL = (pic->height >> >> x265_cli_csps[pic->colorSpace].height[1]); >> + size = HeightL * (pic->stride[1] >> SHIFT); >> + int32_t *uHist = m_curUVHist[0]; >> + pixel *chromaPlane = (pixel *) pic->planes[1]; >> + >> + memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t)); >> + >> + for (int64_t i = 0; i < size; i++) >> + { >> + pixelVal = chromaPlane[i]; >> + uHist[pixelVal]++; >> + } >> + >> + /* V Histogram Calculation */ >> + if (planeCount == 3) >> + { >> + pixelVal = 0; >> + int32_t heightV = (pic->height >> >> x265_cli_csps[pic->colorSpace].height[2]); >> + size = heightV * (pic->stride[2] >> SHIFT); >> + int32_t *vHist = m_curUVHist[1]; >> + chromaPlane = (pixel *) pic->planes[2]; >> + >> + memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t)); >> + for (int64_t i = 0; i < size; i++) >> + { >> + pixelVal = chromaPlane[i]; >> + vHist[pixelVal]++; >> + } >> + for (int i = 0; i < HISTOGRAM_BINS; i++) >> + { >> + m_curMaxUVHist[i] = x265_max(uHist[i], vHist[i]); >> + } >> + } >> + else >> + { /* in case of bi planar color space */ >> + memcpy(m_curMaxUVHist, m_curUVHist[0], HISTOGRAM_BINS * >> sizeof(int32_t)); >> + } >> + } >> + return true; >> +} >> + >> +void Encoder::computeHistogramSAD(double *maxUVNormalizedSad, double >> *edgeNormalizedSad, int curPoc) >> +{ >> + >> + if (curPoc == 0) >> + { /* first frame is scenecut by default no sad computation for the >> same. */ >> + *maxUVNormalizedSad = 0.0; >> + *edgeNormalizedSad = 0.0; >> + } >> + else >> + { >> + /* compute sum of absolute difference of normalized histogram >> bins for maxUV and edge histograms. */ >> + int32_t edgefreqDiff = 0; >> + int32_t maxUVfreqDiff = 0; >> + double edgeProbabilityDiff = 0; >> + >> + for (int j = 0; j < HISTOGRAM_BINS; j++) >> + { >> + if (j < 2) >> + { >> + edgefreqDiff = abs(m_curEdgeHist[j] - m_prevEdgeHist[j]); >> + edgeProbabilityDiff = (double) edgefreqDiff / >> m_planeSizes[0]; >> + *edgeNormalizedSad += edgeProbabilityDiff; >> + } >> + maxUVfreqDiff = abs(m_curMaxUVHist[j] - m_prevMaxUVHist[j]); >> + *maxUVNormalizedSad += (double)maxUVfreqDiff / >> m_planeSizes[2]; >> + } >> + } >> + >> + /* store histograms of previous frame for reference */ >> + size_t bufsize = HISTOGRAM_BINS * sizeof(int32_t); >> + memcpy(m_prevMaxUVHist, m_curMaxUVHist, bufsize); >> + memcpy(m_prevEdgeHist, m_curEdgeHist, 2 * sizeof(int32_t)); >> +} >> + >> +void Encoder::findSceneCuts(x265_picture *pic, bool& bDup, double >> maxUVSad, double edgeSad) >> +{ >> + pic->frameData.bScenecut = false; >> + >> + if (pic->poc == 0) >> + { >> + /* for first frame */ >> + pic->frameData.bScenecut = false; >> + bDup = false; >> + } >> + else >> + { >> + if (edgeSad == 0.0 && maxUVSad == 0.0) >> + { >> + bDup = true; >> + } >> + else if (edgeSad > m_edgeHistThreshold && maxUVSad >= >> m_chromaHistThreshold) >> + { >> + pic->frameData.bScenecut = true; >> + bDup = false; >> + } >> + else if (edgeSad > m_scaledEdgeThreshold || maxUVSad >= >> m_scaledChromaThreshold) >> + { >> + pic->frameData.bScenecut = true; >> + bDup = false; >> + } >> + } >> + >> + if (pic->frameData.bScenecut) >> + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d \n", pic->poc); >> +} >> + >> /** >> * Feed one new input frame into the encoder, get one frame out. If >> pic_in is >> * NULL, a flush condition is implied and pic_in must be NULL for all >> subsequent >> @@ -1339,6 +1503,8 @@ >> const x265_picture* inputPic = NULL; >> static int written = 0, read = 0; >> bool dontRead = false; >> + bool bdropFrame = false; >> + bool dropflag = false; >> >> if (m_exportedPic) >> { >> @@ -1350,6 +1516,17 @@ >> } >> if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum < >> m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in && >> (read < written))) >> { >> + if (m_param->bHistBasedSceneCut && pic_in) >> + { >> + x265_picture *pic = (x265_picture *) pic_in; >> + if (computeHistograms(pic)) >> + { >> + double maxUVSad = 0.0, edgeSad = 0.0; >> + computeHistogramSAD(&maxUVSad, &edgeSad, pic_in->poc); >> + findSceneCuts(pic, bdropFrame, maxUVSad, edgeSad); >> + } >> + } >> + >> if ((m_param->bEnableFrameDuplication && !pic_in && (read < >> written))) >> dontRead = true; >> else >> @@ -1368,7 +1545,7 @@ >> if (pic_in->bitDepth < 8 || pic_in->bitDepth > 16) >> { >> x265_log(m_param, X265_LOG_ERROR, "Input bit depth (%d) >> must be between 8 and 16\n", >> - pic_in->bitDepth); >> + pic_in->bitDepth); >> return -1; >> } >> } >> @@ -1393,9 +1570,27 @@ >> written++; >> } >> >> - psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, >> m_dupBuffer[1]->dupPic, m_param); >> - >> - if (psnrWeight >= m_param->dupThreshold) >> + if (m_param->bEnableFrameDuplication && >> m_param->bHistBasedSceneCut) >> + { >> + if (!bdropFrame && >> m_dupBuffer[1]->dupPic->frameData.bScenecut == false) >> + { >> + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, >> m_dupBuffer[1]->dupPic, m_param); >> + if (psnrWeight >= m_param->dupThreshold) >> + dropflag = true; >> + } >> + else >> + { >> + dropflag = true; >> + } >> + } >> + else if (m_param->bEnableFrameDuplication) >> + { >> + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, >> m_dupBuffer[1]->dupPic, m_param); >> + if (psnrWeight >= m_param->dupThreshold) >> + dropflag = true; >> + } >> + >> + if (dropflag) >> { >> if (m_dupBuffer[0]->bDup) >> { >> @@ -1428,7 +1623,7 @@ >> inputPic = pic_in; >> >> Frame *inFrame; >> - x265_param* p = (m_reconfigure || m_reconfigureRc) ? >> m_latestParam : m_param; >> + x265_param *p = (m_reconfigure || m_reconfigureRc) ? >> m_latestParam : m_param; >> if (m_dpb->m_freeList.empty()) >> { >> inFrame = new Frame; >> @@ -1498,6 +1693,10 @@ >> inFrame->m_poc = ++m_pocLast; >> inFrame->m_userData = inputPic->userData; >> inFrame->m_pts = inputPic->pts; >> + if (m_param->bHistBasedSceneCut) >> + { >> + inFrame->m_lowres.bScenecut = (inputPic->frameData.bScenecut >> == 1) ? true : false; >> + } >> inFrame->m_forceqp = inputPic->forceqp; >> inFrame->m_param = (m_reconfigure || m_reconfigureRc) ? >> m_latestParam : m_param; >> inFrame->m_picStruct = inputPic->picStruct; >> @@ -3209,6 +3408,7 @@ >> * adaptive I frame placement */ >> p->keyframeMax = INT_MAX; >> p->scenecutThreshold = 0; >> + p->bHistBasedSceneCut = 0; >> } >> else if (p->keyframeMax <= 1) >> { >> @@ -3222,6 +3422,7 @@ >> p->lookaheadDepth = 0; >> p->bframes = 0; >> p->scenecutThreshold = 0; >> + p->bHistBasedSceneCut = 0; >> p->bFrameAdaptive = 0; >> p->rc.cuTree = 0; >> p->bEnableWeightedPred = 0; >> @@ -3881,6 +4082,13 @@ >> m_param->searchMethod = m_param->hmeSearchMethod[2]; >> } >> } >> + >> + if (p->bHistBasedSceneCut && !p->edgeTransitionThreshold) >> + { >> + p->edgeTransitionThreshold = 0.01; >> + x265_log(p, X265_LOG_WARNING, "using default threshold %.2lf for >> scene cut detection\n", p->edgeTransitionThreshold); >> + } >> + >> } >> >> void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, >> const x265_picture* picIn, int paramBytes) >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/encoder.h >> --- a/source/encoder/encoder.h Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/encoder/encoder.h Wed Nov 13 18:18:31 2019 +0530 >> @@ -156,7 +156,6 @@ >> bool bDup; >> }; >> >> - >> class FrameEncoder; >> class DPB; >> class Lookahead; >> @@ -164,6 +163,9 @@ >> class ThreadPool; >> class FrameData; >> >> +#define MAX_SCENECUT_THRESHOLD 2.0 >> +#define SCENECUT_STRENGTH_FACTOR 2.0 >> + >> class Encoder : public x265_encoder >> { >> public: >> @@ -228,7 +230,7 @@ >> bool m_reconfigureRc; >> bool m_reconfigureZone; >> >> - int m_saveCtuDistortionLevel; >> + int m_saveCtuDistortionLevel; >> >> /* Begin intra refresh when one not in progress or else begin one as >> soon as the current >> * one is done. Requires bIntraRefresh to be set.*/ >> @@ -245,11 +247,24 @@ >> Lock m_rpsInSpsLock; >> int m_rpsInSpsCount; >> /* For HDR*/ >> - double m_cB; >> - double m_cR; >> + double m_cB; >> + double m_cR; >> + >> + int m_bToneMap; // Enables tone-mapping >> + int m_enableNal; >> >> - int m_bToneMap; // Enables tone-mapping >> - int m_enableNal; >> + /* For histogram based scene-cut detection */ >> + pixel* m_edgePic; >> + int32_t m_curUVHist[2][HISTOGRAM_BINS]; >> + int32_t m_curMaxUVHist[HISTOGRAM_BINS]; >> + int32_t m_prevMaxUVHist[HISTOGRAM_BINS]; >> + int32_t m_curEdgeHist[2]; >> + int32_t m_prevEdgeHist[2]; >> + uint32_t m_planeSizes[3]; >> + double m_edgeHistThreshold; >> + double m_chromaHistThreshold; >> + double m_scaledEdgeThreshold; >> + double m_scaledChromaThreshold; >> >> #ifdef ENABLE_HDR10_PLUS >> const hdr10plus_api *m_hdr10plus_api; >> @@ -355,6 +370,10 @@ >> >> void copyPicture(x265_picture *dest, const x265_picture *src); >> >> + bool computeHistograms(x265_picture *pic); >> + void computeHistogramSAD(double *maxUVNormalizedSAD, double >> *edgeNormalizedSAD, int curPoc); >> + void findSceneCuts(x265_picture *pic, bool& bDup, double >> m_maxUVSADVal, double m_edgeSADVal); >> + >> void initRefIdx(); >> void analyseRefIdx(int *numRefIdx); >> void updateRefIdx(); >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/ratecontrol.cpp >> --- a/source/encoder/ratecontrol.cpp Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/encoder/ratecontrol.cpp Wed Nov 13 18:18:31 2019 +0530 >> @@ -1200,6 +1200,7 @@ >> m_param->rc.bStatRead = 0; >> m_param->bFrameAdaptive = 0; >> m_param->scenecutThreshold = 0; >> + m_param->bHistBasedSceneCut = 0; >> m_param->rc.cuTree = 0; >> if (m_param->bframes > 1) >> m_param->bframes = 1; >> @@ -2284,7 +2285,7 @@ >> if (m_isVbv && m_currentSatd > 0 && curFrame) >> { >> if (m_param->lookaheadDepth || m_param->rc.cuTree || >> - m_param->scenecutThreshold || >> + (m_param->scenecutThreshold || m_param->bHistBasedSceneCut) >> || >> (m_param->bFrameAdaptive && m_param->bframes)) >> { >> /* Lookahead VBV: If lookahead is done, raise the quantizer >> as necessary >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/slicetype.cpp >> --- a/source/encoder/slicetype.cpp Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/encoder/slicetype.cpp Wed Nov 13 18:18:31 2019 +0530 >> @@ -85,6 +85,69 @@ >> >> } // end anonymous namespace >> >> +namespace X265_NS { >> + >> +bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, >> intptr_t stride, int height, int width, bool bcalcTheta) >> +{ >> + intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = >> 0, colThree = 0; >> + intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, >> bottomRight = 0; >> + >> + const int startIndex = 1; >> + >> + if (!edgePic || !refPic || (!edgeTheta && bcalcTheta)) >> + { >> + return false; >> + } >> + else >> + { >> + float gradientH = 0, gradientV = 0, radians = 0, theta = 0; >> + float gradientMagnitude = 0; >> + pixel blackPixel = 0; >> + >> + //Applying Sobel filter expect for border pixels >> + height = height - startIndex; >> + width = width - startIndex; >> + for (int rowNum = startIndex; rowNum < height; rowNum++) >> + { >> + rowTwo = rowNum * stride; >> + rowOne = rowTwo - stride; >> + rowThree = rowTwo + stride; >> + >> + for (int colNum = startIndex; colNum < width; colNum++) >> + { >> + >> + /* Horizontal and vertical gradients >> + [ -3 0 3 ] [-3 -10 -3 ] >> + gH =[ -10 0 10] gV = [ 0 0 0 ] >> + [ -3 0 3 ] [ 3 10 3 ] */ >> + >> + colTwo = colNum; >> + colOne = colTwo - startIndex; >> + colThree = colTwo + startIndex; >> + middle = rowTwo + colTwo; >> + topLeft = rowOne + colOne; >> + topRight = rowOne + colThree; >> + bottomLeft = rowThree + colOne; >> + bottomRight = rowThree + colThree; >> + gradientH = (float)(-3 * refPic[topLeft] + 3 * >> refPic[topRight] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo + >> colThree] - 3 * refPic[bottomLeft] + 3 * refPic[bottomRight]); >> + gradientV = (float)(-3 * refPic[topLeft] - 10 * >> refPic[rowOne + colTwo] - 3 * refPic[topRight] + 3 * refPic[bottomLeft] + >> 10 * refPic[rowThree + colTwo] + 3 * refPic[bottomRight]); >> + gradientMagnitude = sqrtf(gradientH * gradientH + >> gradientV * gradientV); >> + if(bcalcTheta) >> + { >> + edgeTheta[middle] = 0; >> + radians = atan2(gradientV, gradientH); >> + theta = (float)((radians * 180) / PI); >> + if (theta < 0) >> + theta = 180 + theta; >> + edgeTheta[middle] = (pixel)theta; >> + } >> + edgePic[middle] = (pixel)(gradientMagnitude >= >> edgeThreshold ? edgeThreshold : blackPixel); >> + } >> + } >> + return true; >> + } >> +} >> + >> void edgeFilter(Frame *curFrame, x265_param* param) >> { >> int height = curFrame->m_fencPic->m_picHeight; >> @@ -114,6 +177,7 @@ >> //Applying Gaussian filter on the picture >> src = (pixel*)curFrame->m_fencPic->m_picOrg[0]; >> refPic = curFrame->m_gaussianPic + >> curFrame->m_fencPic->m_lumaMarginY * stride + >> curFrame->m_fencPic->m_lumaMarginX; >> + edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * >> stride + curFrame->m_fencPic->m_lumaMarginX; >> pixel pixelValue = 0; >> >> for (int rowNum = 0; rowNum < height; rowNum++) >> @@ -146,51 +210,8 @@ >> } >> } >> >> -#if HIGH_BIT_DEPTH //10-bit build >> - float threshold = 1023; >> - pixel whitePixel = 1023; >> -#else >> - float threshold = 255; >> - pixel whitePixel = 255; >> -#endif >> -#define PI 3.14159265 >> - >> - float gradientH = 0, gradientV = 0, radians = 0, theta = 0; >> - float gradientMagnitude = 0; >> - pixel blackPixel = 0; >> - edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * >> stride + curFrame->m_fencPic->m_lumaMarginX; >> - //Applying Sobel filter on the gaussian filtered picture >> - for (int rowNum = 0; rowNum < height; rowNum++) >> - { >> - for (int colNum = 0; colNum < width; colNum++) >> - { >> - edgeTheta[(rowNum*stride) + colNum] = 0; >> - if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1) >> && (colNum != width - 1)) //Ignoring the border pixels of the picture >> - { >> - /*Horizontal and vertical gradients >> - [ -3 0 3 ] [-3 -10 -3 ] >> - gH = [ -10 0 10] gV = [ 0 0 0 ] >> - [ -3 0 3 ] [ 3 10 3 ]*/ >> - >> - const intptr_t rowOne = (rowNum - 1)*stride, colOne = >> colNum -1; >> - const intptr_t rowTwo = rowNum * stride, colTwo = colNum; >> - const intptr_t rowThree = (rowNum + 1)*stride, colThree >> = colNum + 1; >> - const intptr_t index = (rowNum*stride) + colNum; >> - >> - gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 * >> refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 * >> refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 * >> refPic[rowThree + colThree]); >> - gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 * >> refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 * >> refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 * >> refPic[rowThree + colThree]); >> - >> - gradientMagnitude = sqrtf(gradientH * gradientH + >> gradientV * gradientV); >> - radians = atan2(gradientV, gradientH); >> - theta = (float)((radians * 180) / PI); >> - if (theta < 0) >> - theta = 180 + theta; >> - edgeTheta[(rowNum*stride) + colNum] = (pixel)theta; >> - >> - edgePic[index] = gradientMagnitude >= threshold ? >> whitePixel : blackPixel; >> - } >> - } >> - } >> + if(!computeEdge(edgePic, refPic, edgeTheta, stride, height, width, >> true)) >> + x265_log(NULL, X265_LOG_ERROR, "Failed edge computation!"); >> } >> >> //Find the angle of a block by averaging the pixel angles >> @@ -1471,7 +1492,7 @@ >> >> if (m_lastNonB && !m_param->rc.bStatRead && >> ((m_param->bFrameAdaptive && m_param->bframes) || >> - m_param->rc.cuTree || m_param->scenecutThreshold || >> + m_param->rc.cuTree || m_param->scenecutThreshold || >> m_param->bHistBasedSceneCut || >> (m_param->lookaheadDepth && m_param->rc.vbvBufferSize))) >> { >> slicetypeAnalyse(frames, false); >> @@ -1971,10 +1992,15 @@ >> >> int numBFrames = 0; >> int numAnalyzed = numFrames; >> - bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames); >> + bool isScenecut = false; >> >> /* When scenecut threshold is set, use scenecut detection for I >> frame placements */ >> - if (m_param->scenecutThreshold && isScenecut) >> + if (m_param->scenecutThreshold) >> + isScenecut = scenecut(frames, 0, 1, true, origNumFrames); >> + else if (m_param->bHistBasedSceneCut) >> + isScenecut = frames[1]->bScenecut; >> + >> + if (isScenecut) >> { >> frames[1]->sliceType = X265_TYPE_I; >> return; >> @@ -1985,14 +2011,17 @@ >> m_extendGopBoundary = false; >> for (int i = m_param->bframes + 1; i < origNumFrames; i += >> m_param->bframes + 1) >> { >> - scenecut(frames, i, i + 1, true, origNumFrames); >> + if (m_param->scenecutThreshold) >> + scenecut(frames, i, i + 1, true, origNumFrames); >> + >> for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1, >> origNumFrames); j++) >> { >> - if (frames[j]->bScenecut && scenecutInternal(frames, j - >> 1, j, true) ) >> - { >> - m_extendGopBoundary = true; >> - break; >> - } >> + if (( m_param->scenecutThreshold && frames[j]->bScenecut >> && scenecutInternal(frames, j - 1, j, true)) || >> + (m_param->bHistBasedSceneCut && >> frames[j]->bScenecut)) >> + { >> + m_extendGopBoundary = true; >> + break; >> + } >> } >> if (m_extendGopBoundary) >> break; >> @@ -2097,13 +2126,14 @@ >> { >> for (int j = 1; j < numBFrames + 1; j++) >> { >> - if (scenecut(frames, j, j + 1, false, origNumFrames) || >> + if ((m_param->scenecutThreshold && scenecut(frames, j, j >> + 1, false, origNumFrames)) || >> + (m_param->bHistBasedSceneCut && frames[j + >> 1]->bScenecut) || >> (bForceRADL && (frames[j]->frameNum == preRADL))) >> - { >> - frames[j]->sliceType = X265_TYPE_P; >> - numAnalyzed = j; >> - break; >> - } >> + { >> + frames[j]->sliceType = X265_TYPE_P; >> + numAnalyzed = j; >> + break; >> + } >> } >> } >> resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, >> numAnalyzed + 1); >> @@ -3289,3 +3319,5 @@ >> fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq; >> fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost, >> LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT)); >> } >> + >> +} >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/encoder/slicetype.h >> --- a/source/encoder/slicetype.h Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/encoder/slicetype.h Wed Nov 13 18:18:31 2019 +0530 >> @@ -43,6 +43,13 @@ >> #define AQ_EDGE_BIAS 0.5 >> #define EDGE_INCLINATION 45 >> >> +#ifdef HIGH_BIT_DEPTH >> +#define edgeThreshold 1023.0 >> +#else >> +#define edgeThreshold 255.0 >> +#endif >> +#define PI 3.14159265 >> + >> /* Thread local data for lookahead tasks */ >> struct LookaheadTLD >> { >> @@ -258,6 +265,7 @@ >> CostEstimateGroup& operator=(const CostEstimateGroup&); >> }; >> >> -} >> +bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, >> intptr_t stride, int height, int width, bool bcalcTheta); >> >> +} >> #endif // ifndef X265_SLICETYPE_H >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/test/regression-tests.txt >> --- a/source/test/regression-tests.txt Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/test/regression-tests.txt Wed Nov 13 18:18:31 2019 +0530 >> @@ -159,6 +159,8 @@ >> Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold >> 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 >> Kimono1_1920x1080_24_400.yuv,--preset superfast --qp 28 --zones >> 0,139,q=32 >> Island_960x540_420p_8bit_24fps.yuv,--no-cutree --aq-mode 0 --bitrate >> 6000 --scenecut-aware-qp >> +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut >> --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 >> --vbv-bufsize 15000 --vbv-maxrate 12000 >> +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut >> --hist-threshold 0.02 >> >> # Main12 intraCost overflow bug test >> 720p50_parkrun_ter.y4m,--preset medium >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/x265.h >> --- a/source/x265.h Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/x265.h Wed Nov 13 18:18:31 2019 +0530 >> @@ -1024,7 +1024,8 @@ >> int lookaheadSlices; >> >> /* An arbitrary threshold which determines how aggressively the >> lookahead >> - * should detect scene cuts. The default (40) is recommended. */ >> + * should detect scene cuts for cost based scenecut detection. >> + * The default (40) is recommended. */ >> int scenecutThreshold; >> >> /* Replace keyframes by using a column of intra blocks that move >> across the video >> @@ -1839,14 +1840,24 @@ >> * Default is disabled. */ >> int bEnableSceneCutAwareQp; >> >> - /*The duration(in milliseconds) for which there is a reduction in >> the bits spent on the inter-frames after a scenecut >> + /* The duration(in milliseconds) for which there is a reduction in >> the bits spent on the inter-frames after a scenecut >> * by increasing their QP, when bEnableSceneCutAwareQp is set. >> Default is 500ms.*/ >> int scenecutWindow; >> >> /* The offset by which QP is incremented for inter-frames when >> bEnableSceneCutAwareQp is set. >> * Default is +5. */ >> int maxQpDelta; >> + >> + /* A genuine threshold used for histogram based scene cut detection. >> + * This threshold determines whether a frame is a scenecut or not >> + * when compared against the edge and chroma histogram sad values. >> + * Default 0.01. Range: Real number in the interval (0,2). */ >> + double edgeTransitionThreshold; >> + >> + /* Enables histogram based scenecut detection algorithm to detect >> scenecuts. Default disabled */ >> + int bHistBasedSceneCut; >> } x265_param; >> + >> /* x265_param_alloc: >> * Allocates an x265_param instance. The returned param structure is not >> * special in any way, but using this method together with >> x265_param_free() >> diff -r 04db2bfee5d6 -r 97a9eca413d8 source/x265cli.h >> --- a/source/x265cli.h Thu Oct 31 16:23:27 2019 +0530 >> +++ b/source/x265cli.h Wed Nov 13 18:18:31 2019 +0530 >> @@ -129,6 +129,9 @@ >> { "scenecut", required_argument, NULL, 0 }, >> { "no-scenecut", no_argument, NULL, 0 }, >> { "scenecut-bias", required_argument, NULL, 0 }, >> + { "hist-scenecut", no_argument, NULL, 0}, >> + { "no-hist-scenecut", no_argument, NULL, 0}, >> + { "hist-threshold", required_argument, NULL, 0}, >> { "fades", no_argument, NULL, 0 }, >> { "no-fades", no_argument, NULL, 0 }, >> { "scenecut-aware-qp", no_argument, NULL, 0 }, >> @@ -489,7 +492,10 @@ >> H0(" --gop-lookahead <integer> Extends gop boundary if a >> scenecut is found within this from keyint boundary. Default 0\n"); >> H0(" --no-scenecut Disable adaptive I-frame >> decision\n"); >> H0(" --scenecut <integer> How aggressively to insert >> extra I-frames. Default %d\n", param->scenecutThreshold); >> - H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. >> Default %.2f\n", param->scenecutBias); >> + H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. >> Default %.2f\n", param->scenecutBias); >> + H0(" --hist-scenecut Enables histogram based >> scene-cut detection using histogram based algorithm.\n"); >> + H0(" --no-hist-scenecut Disables histogram based >> scene-cut detection using histogram based algorithm.\n"); >> + H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's >> Normalized SAD threshold for histogram based scenecut detection Default >> %.2f\n", param->edgeTransitionThreshold); >> H0(" --[no-]fades Enable detection and handling >> of fade-in regions. Default %s\n", OPT(param->bEnableFades)); >> H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames >> inside the scenecut window after scenecut. Default %s\n", >> OPT(param->bEnableSceneCutAwareQp)); >> H1(" --scenecut-window <0..1000> QP incremental duration(in >> milliseconds) when scenecut-aware-qp is enabled. Default %d\n", >> param->scenecutWindow); >> >> -- >> *With Regards,* >> *Srikanth Kurapati.* >> _______________________________________________ >> x265-devel mailing list >> [email protected] >> https://mailman.videolan.org/listinfo/x265-devel >> > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- Regards, *Aruna Matheswaran,* Video Codec Engineer, Media & AI analytics BU,
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
