# HG changeset patch # User gopi Satykrishna Akisetty # Date 1571121977 -19800 # Tue Oct 15 12:16:17 2019 +0530 # Node ID deaecadc43060ba37a85d9724a1a306a86433432 # Parent 37648fca915b389bafe923d8443818359e80ebf2 Improved scenecut detection
This patch does the following. 1. identifies scenecuts using sad of edge and chroma histogram based thresholding. 2. Add option "--hist-scenecut" and "--hist-threshold' to enable improved scenecut method for slice type decisions,rate control and a threshold for determining scene-cuts. 3. Removed duplicate edgefilter code and created a global function for use in scene cut detection and aq in Lookahead. diff -r 37648fca915b -r deaecadc4306 doc/reST/cli.rst --- a/doc/reST/cli.rst Fri Oct 11 12:45:52 2019 +0530 +++ b/doc/reST/cli.rst Tue Oct 15 12:16:17 2019 +0530 @@ -1427,6 +1427,18 @@ intra cost of a frame used in scenecut detection. For example, a value of 5 indicates, if the inter cost of a frame is greater than or equal to 95 percent of the intra cost of the frame, then detect this frame as scenecut. Values between 5 and 15 are recommended. Default 5. + +.. option:: --hist-scenecut, --no-hist-scenecut + + indicates that I-frames need to be inserted using edge and color histogram based scenecut algorithm. + option: `--hist-scencut` 1 enables adaptive I frame placement using this method and disables the default scene cut algorithm. + option:`--no-hist-scenecut` adaptive I frame placement. + +.. option:: --hist-threshold <0.0..2.0> + + This value represents the threshold for SAD for edge histograms used in scenecut detection. This requires hist-scenecut to be enabled. + For example, a value of 0.2 indicates that a frame with SAD value greater than 0.2 against the previous frame as scenecut. + Values between 0.0 and 2.0 are recommended. Default 0.1. .. option:: --radl <integer> diff -r 37648fca915b -r deaecadc4306 source/CMakeLists.txt --- a/source/CMakeLists.txt Fri Oct 11 12:45:52 2019 +0530 +++ b/source/CMakeLists.txt Tue Oct 15 12:16:17 2019 +0530 @@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 180) +set(X265_BUILD 181) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" diff -r 37648fca915b -r deaecadc4306 source/common/CMakeLists.txt --- a/source/common/CMakeLists.txt Fri Oct 11 12:45:52 2019 +0530 +++ b/source/common/CMakeLists.txt Tue Oct 15 12:16:17 2019 +0530 @@ -151,4 +151,5 @@ predict.cpp predict.h scalinglist.cpp scalinglist.h quant.cpp quant.h contexts.h - deblock.cpp deblock.h) + deblock.cpp deblock.h + scenecut.h scenecut.cpp) diff -r 37648fca915b -r deaecadc4306 source/common/common.h --- a/source/common/common.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/common/common.h Tue Oct 15 12:16:17 2019 +0530 @@ -129,14 +129,20 @@ typedef uint64_t sum2_t; typedef uint64_t pixel4; typedef int64_t ssum2_t; +#define HISTOGRAM_SIZE 1024 +#define SHIFT 1 #else typedef uint8_t pixel; typedef uint16_t sum_t; typedef uint32_t sum2_t; typedef uint32_t pixel4; typedef int32_t ssum2_t; // Signed sum +#define HISTOGRAM_SIZE 256 +#define SHIFT 0 #endif // if HIGH_BIT_DEPTH +#define PI 3.14159265 + #if X265_DEPTH < 10 typedef uint32_t sse_t; #else diff -r 37648fca915b -r deaecadc4306 source/common/param.cpp --- a/source/common/param.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/common/param.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -167,6 +167,8 @@ param->bFrameAdaptive = X265_B_ADAPT_TRELLIS; param->bBPyramid = 1; param->scenecutThreshold = 40; /* Magic number pulled in from x264 */ + param->edgeTransitionThreshold = 0.01; + param->bHistbasedScenecut = false; param->lookaheadSlices = 8; param->lookaheadThreads = 0; param->scenecutBias = 5.0; @@ -567,6 +569,7 @@ param->bframes = 0; param->lookaheadDepth = 0; param->scenecutThreshold = 0; + param->bHistbasedScenecut = false; param->rc.cuTree = 0; param->frameNumThreads = 1; } @@ -609,7 +612,7 @@ return 0; } -static int x265_atobool(const char* str, bool& bError) +static bool x265_atobool(const char* str, bool& bError) { if (!strcmp(str, "1") || !strcmp(str, "true") || @@ -920,6 +923,7 @@ { bError = false; p->scenecutThreshold = atoi(value); + p->bHistbasedScenecut = false; } } OPT("temporal-layers") p->bEnableTemporalSubLayers = atobool(value); @@ -1186,6 +1190,31 @@ OPT("opt-ref-list-length-pps") p->bOptRefListLengthPPS = atobool(value); OPT("multi-pass-opt-rps") p->bMultiPassOptRPS = atobool(value); OPT("scenecut-bias") p->scenecutBias = atof(value); + OPT("hist-scenecut") + { + p->bHistbasedScenecut = atobool(value); + + if (bError) + { + bError = false; + p->bHistbasedScenecut = false; + } + + if (p->bHistbasedScenecut) { + bError = false; + p->scenecutThreshold = 0; + } + + } + OPT("hist-threshold") { + p->edgeTransitionThreshold = atof(value); + if (bError) + { + bError = false; + p->edgeTransitionThreshold = 0.01; + x265_log(p, X265_LOG_INFO, "using default threshold %.2lf for scene cut detection\n", p->edgeTransitionThreshold); + } + } OPT("lookahead-threads") p->lookaheadThreads = atoi(value); OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value); OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value); @@ -1623,8 +1652,14 @@ "Valid Logging level -1:none 0:error 1:warning 2:info 3:debug 4:full"); CHECK(param->scenecutThreshold < 0, "scenecutThreshold must be greater than 0"); - CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias, - "scenecut-bias must be between 0 and 100"); + if (param->scenecutThreshold) { + CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias, + "scenecut-bias must be between 0 and 100"); + } + else if (param->bHistbasedScenecut) { + CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 < param->edgeTransitionThreshold, + "hist-threshold must be between 0.0 and 2.0"); + } CHECK(param->radl < 0 || param->radl > param->bframes, "radl must be between 0 and bframes"); CHECK(param->rdPenalty < 0 || param->rdPenalty > 2, @@ -1780,10 +1815,20 @@ x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge : %s / %d / %d / %d\n", x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand); - if (param->keyframeMax != INT_MAX || param->scenecutThreshold) - x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax, param->scenecutThreshold, param->scenecutBias * 100); + if (param->scenecutThreshold && param->keyframeMax != INT_MAX) + param->edgeTransitionThreshold = 0.0; + else if (param->bHistbasedScenecut && param->keyframeMax != INT_MAX) + param->scenecutBias = 0.0; + else if (param->keyframeMax != INT_MAX) { + param->edgeTransitionThreshold = 0.0; + param->scenecutBias = 0.0; + } + + if (param->keyframeMax == INT_MAX) + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut : disabled\n"); else - x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut : disabled\n"); + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / bias / threshold : %d / %d / %d / %.2lf / %.2lf\n", param->keyframeMin, param->keyframeMax, ( param->bHistbasedScenecut || param->scenecutThreshold ), param->scenecutBias * 100, param->edgeTransitionThreshold); + if (param->cbQpOffset || param->crQpOffset) x265_log(param, X265_LOG_INFO, "Cb/Cr QP Offset : %d / %d\n", param->cbQpOffset, param->crQpOffset); @@ -1949,6 +1994,8 @@ s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth); s += sprintf(s, " lookahead-slices=%d", p->lookaheadSlices); s += sprintf(s, " scenecut=%d", p->scenecutThreshold); + s += sprintf(s, " hist-scenecut=%d", p->bHistbasedScenecut); + s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold); s += sprintf(s, " radl=%d", p->radl); BOOL(p->bEnableHRDConcatFlag, "splice"); BOOL(p->bIntraRefresh, "intra-refresh"); @@ -2096,6 +2143,8 @@ BOOL(p->bOptRefListLengthPPS, "opt-ref-list-length-pps"); BOOL(p->bMultiPassOptRPS, "multi-pass-opt-rps"); s += sprintf(s, " scenecut-bias=%.2f", p->scenecutBias); + s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold); + BOOL(p->bOptCUDeltaQP, "opt-cu-delta-qp"); BOOL(p->bAQMotion, "aq-motion"); BOOL(p->bEmitHDRSEI, "hdr"); @@ -2246,6 +2295,7 @@ dst->lookaheadSlices = src->lookaheadSlices; dst->lookaheadThreads = src->lookaheadThreads; dst->scenecutThreshold = src->scenecutThreshold; + dst->bHistbasedScenecut = src->bHistbasedScenecut; dst->bIntraRefresh = src->bIntraRefresh; dst->maxCUSize = src->maxCUSize; dst->minCUSize = src->minCUSize; @@ -2403,6 +2453,7 @@ dst->bOptRefListLengthPPS = src->bOptRefListLengthPPS; dst->bMultiPassOptRPS = src->bMultiPassOptRPS; dst->scenecutBias = src->scenecutBias; + dst->edgeTransitionThreshold = src->edgeTransitionThreshold; dst->gopLookahead = src->lookaheadDepth; dst->bOptCUDeltaQP = src->bOptCUDeltaQP; dst->analysisMultiPassDistortion = src->analysisMultiPassDistortion; diff -r 37648fca915b -r deaecadc4306 source/common/scenecut.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/source/common/scenecut.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -0,0 +1,674 @@ +#include <iostream> +#include <sstream> +#include <vector> +#include <algorithm> +#include <math.h> +#include "encoder.h" +#include "scenecut.h" +#include "slicetype.h" + +using namespace std; +using namespace X265_NS; + +#define EDGE_PLANE_COUNT 1 + +namespace X265_NS { + +void computeEdge(pixel * edgePic, pixel *refPic, pixel * edgeTheta, intptr_t stride, int height, int width) + { + float gradientH = 0, gradientV = 0, radians = 0, theta = 0; + float gradientMagnitude = 0; + pixel blackPixel = 0; + + //Applying Sobel filter + for (int rowNum = 0; rowNum < height; rowNum++) + { + for (int colNum = 0; colNum < width; colNum++) + { + edgeTheta[(rowNum*stride) + colNum] = 0; + if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1) && (colNum != width - 1)) //Ignoring the border pixels of the picture + { + /*Horizontal and vertical gradients + [ -3 0 3 ] [-3 -10 -3 ] + gH = [ -10 0 10] gV = [ 0 0 0 ] + [ -3 0 3 ] [ 3 10 3 ]*/ + + const intptr_t rowOne = (rowNum - 1)*stride, colOne = colNum - 1; + const intptr_t rowTwo = rowNum * stride, colTwo = colNum; + const intptr_t rowThree = (rowNum + 1)*stride, colThree = colNum + 1; + const intptr_t index = (rowNum*stride) + colNum; + + gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 * refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 * refPic[rowThree + colThree]); + gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 * refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 * refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 * refPic[rowThree + colThree]); + + gradientMagnitude = sqrtf(gradientH * gradientH + gradientV * gradientV); + radians = atan2(gradientV, gradientH); + theta = (float)((radians * 180) / PI); + if (theta < 0) + theta = 180 + theta; + edgeTheta[(rowNum*stride) + colNum] = (pixel)theta; + + edgePic[index] = (pixel)(gradientMagnitude >= edge_threshold ? whitePixel : blackPixel); + } + } + } + } + + Histogram::Histogram() + { + memset(frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + } + + Histogram::Histogram(Histogram const& hist) { + memset(frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + for (int i = 0; i < HISTOGRAM_SIZE; i++) { + frequency_distribution[i] = hist.frequency_distribution[i]; + } + } + + Histogram & Histogram::operator=(Histogram const& hist) + { + memset(frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + memcpy(frequency_distribution, hist.frequency_distribution, sizeof(int32_t)*HISTOGRAM_SIZE); + return *this; + } + + Histogram::~Histogram() {} + + YuvHistogram::YuvHistogram() {} + + void YuvHistogram::initHistograms(int32_t planecount) { + this->plane_count = planecount; + m_isalloc = false; + bisUpdated = false; + param = NULL; + + yuv_hist = edge_hist = NULL; + plane_sizes = plane_heights = plane_widths = NULL; + edgePic = edgeThetaPic = NULL; + + plane_sizes = X265_MALLOC(int32_t, plane_count); + plane_heights = X265_MALLOC(int32_t, plane_count); + plane_widths = X265_MALLOC(int32_t, plane_count); + + if (!plane_sizes || !plane_heights || !plane_widths) { + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for plane dimensions\n"); + m_isalloc &= false; + } + else { + memset(plane_sizes, 0, plane_count * sizeof(int32_t)); + memset(plane_heights, 0, plane_count * sizeof(int32_t)); + memset(plane_widths, 0, plane_count * sizeof(int32_t)); + m_isalloc &= true; + } + + yuv_hist = X265_MALLOC(Histogram, plane_count); + edge_hist = X265_MALLOC(Histogram, plane_count); + + if (!yuv_hist || !edge_hist) { + m_isalloc &= false; + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for histograms\n"); + } + + } + + void YuvHistogram::initHistograms(x265_param *p) { + param = p; + plane_count = x265_cli_csps[param->internalCsp].planes; + bisUpdated = false; + m_isalloc = false; + + yuv_hist = edge_hist = NULL; + plane_sizes = plane_heights = plane_widths = NULL; + edgePic = edgeThetaPic = NULL; + + plane_sizes = X265_MALLOC(int32_t, plane_count); + plane_heights = X265_MALLOC(int32_t, plane_count); + plane_widths = X265_MALLOC(int32_t, plane_count); + + if (!plane_sizes || !plane_heights || !plane_widths) { + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for plane dimensions\n"); + m_isalloc &= false; + } + else { + memset(plane_sizes, 0, plane_count * sizeof(int32_t)); + memset(plane_heights, 0, plane_count * sizeof(int32_t)); + memset(plane_widths, 0, plane_count * sizeof(int32_t)); + m_isalloc &= true; + } + + yuv_hist = X265_MALLOC(Histogram, plane_count); + edge_hist = X265_MALLOC(Histogram, plane_count); + + if (!yuv_hist || !edge_hist) { + m_isalloc &= false; + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for histograms\n"); + } + + } + + bool YuvHistogram::allocHistogramBuffers() { + //allocate memory for edge filter output and histograms + bool isalloc = true; + + edgePic = X265_MALLOC(pixel*, plane_count); + edgeThetaPic = X265_MALLOC(pixel*, plane_count); + + if (!edgePic || !edgeThetaPic) { + isalloc &= false; + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for edge buffers\n"); + return isalloc; + } + + for (int i = 0; i < plane_count; i++) { + + edgePic[i] = edgeThetaPic[i] = NULL; + edgePic[i] = X265_MALLOC(pixel, plane_sizes[i]); + edgeThetaPic[i] = X265_MALLOC(pixel, plane_sizes[i]); + + if (edgePic[i] && edgeThetaPic[i]) { + memset(edgePic[i], 0, plane_sizes[i] * sizeof(pixel)); + memset(edgeThetaPic[i], 0, plane_sizes[i] * sizeof(pixel)); + isalloc &= true; + } + else + isalloc &= false; + } + return isalloc; + } + + YuvHistogram::~YuvHistogram() + { + freeHistogramBuffers(); //change implementation based on allocation changes + } + + YuvHistogram::YuvHistogram(YuvHistogram const& hist) { + + maxuv_hist = hist.maxuv_hist; + plane_count = hist.plane_count; + bisUpdated = hist.bisUpdated; + param = hist.param; + memcpy(plane_sizes, hist.plane_sizes, plane_count * sizeof(int32_t)); + memcpy(plane_heights, hist.plane_heights, plane_count * sizeof(int32_t)); + memcpy(plane_widths, hist.plane_widths, plane_count * sizeof(int32_t)); + memcpy(yuv_hist, hist.yuv_hist, plane_count * sizeof(Histogram)); + memcpy(edge_hist, hist.edge_hist, plane_count * sizeof(Histogram)); + + if (!m_isalloc) { + m_isalloc = false; + m_isalloc = allocHistogramBuffers(); + } + + if (m_isalloc) { + for (int i = 0; i < plane_count; i++) { + if (edgePic[i] && edgeThetaPic[i]) { + memcpy(edgePic[i], hist.edgePic[i], plane_sizes[i] * sizeof(pixel)); + memcpy(edgeThetaPic[i], hist.edgeThetaPic[i], plane_sizes[i] * sizeof(pixel)); + } + } + } + + } + + YuvHistogram & YuvHistogram ::operator=(const YuvHistogram & copy_hist) + { + maxuv_hist = copy_hist.maxuv_hist; + plane_count = copy_hist.plane_count; + bisUpdated = copy_hist.bisUpdated; + param = copy_hist.param; + memcpy(plane_sizes, copy_hist.plane_sizes, plane_count * sizeof(int32_t)); + memcpy(plane_heights, copy_hist.plane_heights, plane_count * sizeof(int32_t)); + memcpy(plane_widths, copy_hist.plane_widths, plane_count * sizeof(int32_t)); + memcpy(yuv_hist, copy_hist.yuv_hist, plane_count * sizeof(Histogram)); + memcpy(edge_hist, copy_hist.edge_hist, plane_count * sizeof(Histogram)); + + if (!m_isalloc) { + m_isalloc = false; + m_isalloc = allocHistogramBuffers(); + } + + if (m_isalloc) { + for (int i = 0; i < plane_count; i++) { + if (edgePic[i] && edgeThetaPic[i]) { + memcpy(edgePic[i], copy_hist.edgePic[i], plane_sizes[i] * sizeof(pixel)); + memcpy(edgeThetaPic[i], copy_hist.edgeThetaPic[i], plane_sizes[i] * sizeof(pixel)); + } + } + } + + return *this; + } + + void YuvHistogram::initFrameDimensions(x265_picture & pic) { + + for (int i = 0; i < plane_count; i++) { + plane_widths[i] = pic.width; + plane_heights[i] = pic.height >> x265_cli_csps[pic.colorSpace].height[i]; + plane_sizes[i] = plane_widths[i] * plane_heights[i]; + } + } + + void YuvHistogram::freeHistogramBuffers() { + //de allocate memory for histograms and edge filtered output + if (edgePic && edgeThetaPic) { + for (int i = 0; i < plane_count; i++) { + if (edgePic[i] && edgeThetaPic[i]) { + X265_FREE_ZERO(edgePic[i]); + X265_FREE_ZERO(edgeThetaPic[i]); + } + } + X265_FREE_ZERO(edgePic); + X265_FREE_ZERO(edgeThetaPic); + } + + if (plane_sizes && plane_heights && plane_widths) { + X265_FREE_ZERO(plane_sizes); + X265_FREE_ZERO(plane_heights); + X265_FREE_ZERO(plane_widths); + } + + if (yuv_hist && edge_hist) { + X265_FREE_ZERO(yuv_hist); + X265_FREE_ZERO(edge_hist); + } + } + + bool YuvHistogram::edgeFilter(x265_picture *frame) { + + if (!m_isalloc) { + initFrameDimensions(*frame); + m_isalloc = allocHistogramBuffers(); + } + + if (m_isalloc) { + for (int idx = 0; idx < EDGE_PLANE_COUNT; idx++) { + + memset(edgePic[idx], 0, sizeof(pixel) * plane_sizes[idx]); + memset(edgeThetaPic[idx], 0, sizeof(pixel) * plane_sizes[idx]); + + pixel *src = (pixel*)frame->planes[idx]; + pixel *edge_pic = edgePic[idx]; + pixel *ref_pic = src; + pixel *edge_theta = edgeThetaPic[idx]; + + assert(edge_pic != NULL); + assert(ref_pic != NULL); + memcpy(edge_pic, src, plane_sizes[idx] * sizeof(pixel)); + memcpy(ref_pic, src, plane_sizes[idx] * sizeof(pixel)); + + computeEdge(edge_pic, ref_pic, edge_theta,plane_widths[idx], plane_heights[idx], plane_widths[idx]); + } + return true; + } + else { + return false; + } + } + + bool YuvHistogram::computeHistograms(x265_picture &cur_frame) { + + bool bsuccess = false; + bsuccess = computeLumaEdgeHistogram(cur_frame); + if (bsuccess) { + if (plane_count > 1) { + bsuccess &= computeChromaHistogram(cur_frame); + } + return bsuccess; + } + else { + return bsuccess; + } + + } + + bool YuvHistogram::computeLumaEdgeHistogram(x265_picture &frame) { + + pixel pixel_val = 0; + + memset(edge_hist[0].frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + + int size = frame.height*(frame.stride[0] >> SHIFT); + + for (int i = 0; i < size; i++) { + pixel_val = edgePic[0][i]; + edge_hist[0].frequency_distribution[pixel_val]++; + } + return true; + } + + bool YuvHistogram::computeChromaHistogram(x265_picture &frame) { + /*u hist calculation*/ + pixel pixel_val = 0; + int32_t pixel_ucount = 0, pixel_vcount = 0; + + int u_height = (frame.height >> x265_cli_csps[frame.colorSpace].height[1]); + int size = u_height * (frame.stride[1] >> SHIFT); + memset(yuv_hist[1].frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + + for (int i = 0; i < size; i++) { + pixel_val = *((pixel *)frame.planes[1] + i); + yuv_hist[1].frequency_distribution[pixel_val]++; + pixel_ucount++; + } + + /*v hist calculation for independent uv planes */ + + if (plane_count == 3) { + pixel_val = 0; + int v_height = (frame.height >> x265_cli_csps[frame.colorSpace].height[2]); + size = v_height * (frame.stride[2] >> SHIFT); + memset(yuv_hist[2].frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + + for (int i = 0; i < size; i++) { + pixel_val = *((pixel *)frame.planes[2] + i); + yuv_hist[2].frequency_distribution[pixel_val]++; + pixel_vcount++; + } + + for (int i = 0; i < HISTOGRAM_SIZE; i++) { + maxuv_hist.frequency_distribution[i] = max(yuv_hist[1].frequency_distribution[i], yuv_hist[2].frequency_distribution[i]); + } + + } + else { + maxuv_hist = yuv_hist[1]; //for two planes scenario + } + + return true; + } + + bool YuvHistogram::isUpdated() { + return bisUpdated; + } + + void YuvHistogram::setUpdateFlag(bool flag) { + bisUpdated = flag; + } + + bool YuvHistogram::getUpdateFlag() { + return bisUpdated; + } + + SadYuv::SadYuv() { } + + void SadYuv::initSadYuv(int planecount) { + this->plane_count = planecount; + sad_yuv = NULL; + psad_yuv = NULL; + sad_yuv = X265_MALLOC(int32_t, plane_count); + psad_yuv = X265_MALLOC(double, plane_count); + if (sad_yuv && psad_yuv) { + memset(sad_yuv, 0, plane_count * sizeof(int32_t)); + memset(psad_yuv, 0, sizeof(double) * plane_count); + } + } + + SadYuv & SadYuv::operator=(SadYuv const& sad_val) { + this->plane_count = sad_val.plane_count; + if (!sad_yuv && !psad_yuv) { + sad_yuv = NULL; + psad_yuv = NULL; + sad_yuv = X265_MALLOC(int32_t, plane_count); + psad_yuv = X265_MALLOC(double, plane_count); + if (sad_yuv && psad_yuv) { + memcpy(sad_yuv, sad_val.sad_yuv, plane_count * sizeof(int32_t)); + memcpy(psad_yuv, sad_val.psad_yuv, sizeof(double) * plane_count); + } + } + else { + if (sad_yuv) + memcpy(sad_yuv, sad_val.sad_yuv, plane_count * sizeof(int32_t)); + if (psad_yuv) + memcpy(psad_yuv, sad_val.psad_yuv, sizeof(double) * plane_count); + } + return *this; + } + + SadYuv::~SadYuv() { + if (sad_yuv && psad_yuv) { + X265_FREE(sad_yuv); + X265_FREE(psad_yuv); + } + } + + int sad_stats::frames_scanned=0; + int sad_stats::line_number=0; + + sad_stats::sad_stats(int planecount, double threshold) { + this->plane_count = planecount; + calculateThresholds(threshold); + allocateBuffers(); + } + + void sad_stats::calculateThresholds(double threshold) { + edge_hist_threshold = threshold; + strength_factor = 2.0; + chroma_hist_threshold = threshold * 10.0; + scaled_edge_threshold = edge_hist_threshold * strength_factor; + scaled_chroma_threshold = chroma_hist_threshold * strength_factor; + } + + void sad_stats::init() { + bscene_cut = NULL; + bdrop_frame = NULL; + sad_vals = NULL; + maxuv_sad_vals = NULL; + edge_sad_vals = NULL; + prev_hist = NULL; + } + + sad_stats::~sad_stats() { + releaseBuffers(); + } + + void sad_stats::allocateBuffers() { + + init(); + sad_vals = new SadYuv[DUP_BUFFER](); + maxuv_sad_vals = new SadYuv[DUP_BUFFER](); + edge_sad_vals = new SadYuv[DUP_BUFFER](); + prev_hist = new YuvHistogram(); + prev_hist->initHistograms(plane_count); + + for (int i = 0; i < DUP_BUFFER; i++) { + sad_vals[i].initSadYuv(plane_count); + maxuv_sad_vals[i].initSadYuv(plane_count); + edge_sad_vals[i].initSadYuv(plane_count); + } + + bscene_cut = new bool[DUP_BUFFER]; + bdrop_frame = new bool[DUP_BUFFER]; + + if (!sad_vals || !maxuv_sad_vals || !edge_sad_vals || !bscene_cut || !bdrop_frame) { + x265_log(NULL, X265_LOG_ERROR, "Heap Error !"); + exit(101); + } + else { + memset(bscene_cut, false, 2 * sizeof(bool)); + memset(bdrop_frame, false, 2 * sizeof(bool)); + } + + } + + void sad_stats::releaseBuffers() { + if (sad_vals && maxuv_sad_vals && edge_sad_vals && bscene_cut && bdrop_frame && prev_hist) { + delete[] sad_vals; + delete[] maxuv_sad_vals; + delete[] edge_sad_vals; + delete[] bscene_cut; + delete[] bdrop_frame; + delete prev_hist; + } + } + + bool sad_stats::computeSadValue(YuvHistogram *input_frames, int32_t* plane_sizes) { + + int32_t *yuv_sad_val = NULL, *edge_sad_val = NULL, *maxuv_sad_val = NULL; + + double *maxuv_normalized_sad = NULL, *yuv_norm_sad = NULL, *edge_normalized_sads = NULL; + + YuvHistogram * ref_hist = NULL, *cur_hist = NULL; + + /*inorder to process frames as per poc's updated by frame duplication */ + if (frames_scanned > 0) { + + if (!input_frames[0].isUpdated() && input_frames[1].isUpdated()) { + ref_hist = prev_hist; + cur_hist = input_frames + 1; + + yuv_sad_val = sad_vals[1].sad_yuv, + edge_sad_val = edge_sad_vals[1].sad_yuv, + maxuv_sad_val = maxuv_sad_vals[1].sad_yuv; + maxuv_normalized_sad = maxuv_sad_vals[1].psad_yuv, + yuv_norm_sad = sad_vals[1].psad_yuv, + edge_normalized_sads = edge_sad_vals[1].psad_yuv; + input_frames[1].setUpdateFlag(false); + + } + else if (input_frames[0].isUpdated() && input_frames[1].isUpdated()) { + ref_hist = prev_hist; + cur_hist = input_frames; + + yuv_sad_val = sad_vals[0].sad_yuv, + edge_sad_val = edge_sad_vals[0].sad_yuv, + maxuv_sad_val = maxuv_sad_vals[0].sad_yuv; + maxuv_normalized_sad = maxuv_sad_vals[0].psad_yuv, + yuv_norm_sad = sad_vals[0].psad_yuv, + edge_normalized_sads = edge_sad_vals[0].psad_yuv; + input_frames[0].setUpdateFlag(false); + + } + else if (input_frames[0].isUpdated() && !input_frames[1].isUpdated()) { + ref_hist = prev_hist; + cur_hist = input_frames; + + yuv_sad_val = sad_vals[0].sad_yuv, + edge_sad_val = edge_sad_vals[0].sad_yuv, + maxuv_sad_val = maxuv_sad_vals[0].sad_yuv; + maxuv_normalized_sad = maxuv_sad_vals[0].psad_yuv, + yuv_norm_sad = sad_vals[0].psad_yuv, + edge_normalized_sads = edge_sad_vals[0].psad_yuv; + input_frames[0].setUpdateFlag(false); + } + else { + return true; + } + } + else { + cur_hist = input_frames; + + yuv_sad_val = sad_vals[0].sad_yuv, + edge_sad_val = edge_sad_vals[0].sad_yuv, + maxuv_sad_val = maxuv_sad_vals[0].sad_yuv; + + maxuv_normalized_sad = maxuv_sad_vals[0].psad_yuv, + yuv_norm_sad = sad_vals[0].psad_yuv, + edge_normalized_sads = edge_sad_vals[0].psad_yuv; + input_frames[0].setUpdateFlag(false); + } + + if (frames_scanned == 0) { //first frame is scenecut by default no sad computation for the same. + + maxuv_sad_val[0] = 0; + maxuv_normalized_sad[0] = 0.0; + memset(yuv_sad_val, 0 , plane_count * sizeof(int32_t)); + memset(edge_sad_val, 0, plane_count * sizeof(int32_t)); + memset(edge_normalized_sads, 0, plane_count * sizeof(double)); + memset(yuv_norm_sad, 0, plane_count * sizeof(double)); + + } + else { + int32_t freq_diff[3]; + int32_t maxuv_freq_diff[1]; + int32_t edge_freq_diff[3]; + double color_probability_diff[3], edge_probability_diff[3]; + + memset(yuv_sad_val, 0, plane_count*sizeof(int32_t)); + memset(edge_sad_val, 0, plane_count*sizeof(int32_t)); + + memset(yuv_norm_sad, 0, plane_count * sizeof(double)); + memset(edge_normalized_sads, 0, plane_count * sizeof(double)); + memset(color_probability_diff, 0, plane_count * sizeof(double)); + memset(edge_probability_diff, 0, plane_count * sizeof(double)); + + maxuv_normalized_sad[0] = 0.0; + maxuv_sad_val[0] = 0; + + memset(freq_diff, 0, 3 * sizeof(int32_t)); + memset(maxuv_freq_diff, 0, sizeof(int32_t)); + memset(edge_freq_diff, 0, 3 * sizeof(int32_t)); + + for (int i = 0; i < plane_count; i++) { + { + for (int j = 0; j < HISTOGRAM_SIZE; j++) { + + if (i == 0 && plane_count >= 1) { + maxuv_freq_diff[i] = (abs(cur_hist->maxuv_hist.frequency_distribution[j] - ref_hist->maxuv_hist.frequency_distribution[j])); + maxuv_sad_val[i] += maxuv_freq_diff[i]; + maxuv_normalized_sad[i] += (double)maxuv_freq_diff[i] / plane_sizes[i]; + edge_freq_diff[i] = abs(cur_hist->edge_hist[i].frequency_distribution[j] - ref_hist->edge_hist[i].frequency_distribution[j]); + edge_probability_diff[i] = double(edge_freq_diff[i]) / plane_sizes[i]; + edge_sad_val[i] += edge_freq_diff[i]; + edge_normalized_sads[i] += edge_probability_diff[i]; + } + else { + freq_diff[i] = abs(cur_hist->yuv_hist[i].frequency_distribution[j] - ref_hist->yuv_hist[i].frequency_distribution[j]); + color_probability_diff[i] = (double)freq_diff[i] / plane_sizes[i]; + yuv_sad_val[i] += freq_diff[i]; + yuv_norm_sad[i] += color_probability_diff[i]; + } + + } + + } + } + + } + + *prev_hist = *cur_hist; + + frames_scanned++; + + return true; + } + + void sad_stats::findSceneCuts(x265_picture * picList, bool& bdup) { + + if (frames_scanned == 1) { + //for first frame + bscene_cut[0] = true; + bdrop_frame[0] = false; + picList->analysisData.bScenecut = (int)getSceneCutflag(0); + bdup = getDropflag(0); + picList->analysisData.edgeSadValue = edge_sad_vals[0].psad_yuv[0]; + picList->analysisData.chromaSadValue = maxuv_sad_vals[0].psad_yuv[0]; + } + else { + bscene_cut[1] = bdrop_frame[1] = false; + if (edge_sad_vals[1].psad_yuv[0] == 0) { + bdrop_frame[1] = true; + } + else if (edge_sad_vals[1].psad_yuv[0] > edge_hist_threshold || maxuv_sad_vals[1].psad_yuv[0] >= chroma_hist_threshold) { + bscene_cut[1] = true; + bdrop_frame[1] = false; + } + else if (edge_sad_vals[1].psad_yuv[0] > scaled_edge_threshold || maxuv_sad_vals[1].psad_yuv[0] >= scaled_chroma_threshold) { + bscene_cut[1] = true; + bdrop_frame[1] = false; + } + picList->analysisData.bScenecut = (int)getSceneCutflag(1); + bdup = getDropflag(1); + picList->analysisData.edgeSadValue = edge_sad_vals[1].psad_yuv[0]; + picList->analysisData.chromaSadValue = maxuv_sad_vals[1].psad_yuv[0]; + } + } + + bool sad_stats::getDropflag(int i) { + return bdrop_frame[i]; + } + + bool sad_stats::getSceneCutflag(int i) { + return bscene_cut[i]; + } + +} \ No newline at end of file diff -r 37648fca915b -r deaecadc4306 source/common/scenecut.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/source/common/scenecut.h Tue Oct 15 12:16:17 2019 +0530 @@ -0,0 +1,147 @@ +#ifndef SCENECUT_H +#define SCENECUT_H + +#include <string> +#include <iostream> +#include <sstream> +#include <vector> +#include <algorithm> +#include <math.h> + +#include "yuv.h" +#include "common.h" + +#ifdef HIGH_BIT_DEPTH +#define edge_threshold 1023.0 +#define whitePixel 1023.0 +#else +#define edge_threshold 255.0 +#define pixel whitePixel 255.0 +#endif + +using namespace std; + +namespace X265_NS { + + class Histogram { + + public: + int32_t frequency_distribution[HISTOGRAM_SIZE]; + + Histogram(); + + Histogram(Histogram const& hist); + + Histogram & operator=(Histogram const& hist); + + ~Histogram(); + + }; + + class YuvHistogram { + public: + Histogram *yuv_hist; + Histogram *edge_hist; + int32_t *plane_sizes; + int32_t *plane_heights; + int32_t *plane_widths; + + Histogram maxuv_hist; + int32_t plane_count; + bool bisUpdated; + + pixel** edgePic; + pixel** edgeThetaPic; + + x265_param * param; /*for handling various color spaces*/ + bool m_isalloc; + + YuvHistogram(); + + void initHistograms(int32_t plane_count); + + void initHistograms(x265_param *p); + + bool allocHistogramBuffers(); + + YuvHistogram(YuvHistogram const& hist); + + YuvHistogram & operator=(const YuvHistogram & copy_hist); + + ~YuvHistogram(); + + void initFrameDimensions(x265_picture & pic); + + void freeHistogramBuffers(); + + bool edgeFilter(x265_picture *frame); + + bool computeHistograms(x265_picture &cur_frame); + + bool computeLumaEdgeHistogram(x265_picture &frame); + + bool computeChromaHistogram(x265_picture &frame); + + bool isUpdated(); + + void setUpdateFlag(bool flag); + + bool getUpdateFlag(); + + }; + + struct SadYuv { + int32_t *sad_yuv; + double *psad_yuv; + int plane_count; + ~SadYuv(); + SadYuv(); + void initSadYuv(int plane_count); + SadYuv & operator=(SadYuv const& sad_val); +}; + + class sad_stats { + + bool *bscene_cut; + bool *bdrop_frame; + SadYuv * sad_vals; + SadYuv * maxuv_sad_vals; + SadYuv * edge_sad_vals; + int plane_count; + static int line_number; + static int frames_scanned; + YuvHistogram *prev_hist; + double edge_hist_threshold; + double chroma_hist_threshold; + double scaled_chroma_threshold; + double scaled_edge_threshold; + double strength_factor; + + public: + sad_stats(int plane_count, double threshold); + + ~sad_stats(); + + void init(); + + void allocateBuffers(); + + void releaseBuffers(); + + void calculateThresholds(double threshold); + + bool computeSadValue(YuvHistogram *frames, int32_t* plane_sizes); + + void findSceneCuts(x265_picture * piclist,bool & bdup); + + bool getDropflag(int i); + + bool getSceneCutflag(int i); + + }; + +void computeEdge(pixel * edgePic, pixel *refPic, pixel * edgeTheta, intptr_t stride, int height, int width); + +} + +#endif diff -r 37648fca915b -r deaecadc4306 source/encoder/api.cpp --- a/source/encoder/api.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/api.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -31,6 +31,7 @@ #include "nal.h" #include "bitcost.h" #include "svt.h" +#include "scenecut.h" #if ENABLE_LIBVMAF #include "libvmaf.h" @@ -117,7 +118,10 @@ x265_log(param, X265_LOG_INFO, "build info %s\n", PFX(build_info_str)); encoder = new Encoder; - + encoder->m_sad_stats = new sad_stats(x265_cli_csps[p->internalCsp].planes,param->edgeTransitionThreshold); + encoder->m_hist_of_adj_frames = new YuvHistogram[2]; + encoder->m_hist_of_adj_frames[0].initHistograms(p); + encoder->m_hist_of_adj_frames[1].initHistograms(p); #ifdef SVT_HEVC if (param->bEnableSvtHevc) @@ -809,6 +813,7 @@ CHECKED_MALLOC_ZERO(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir); } analysis->interData = interData; + analysis->bScenecut = false; return; @@ -924,6 +929,7 @@ pic->rpu.payloadSize = 0; pic->rpu.payload = NULL; pic->picStruct = 0; + pic->bufUpdated = false; if ((param->analysisSave || param->analysisLoad) || (param->bAnalysisType == AVC_INFO)) { @@ -933,7 +939,9 @@ uint32_t numCUsInFrame = widthInCU * heightInCU; pic->analysisData.numCUsInFrame = numCUsInFrame; pic->analysisData.numPartitions = param->num4x4Partitions; + pic->analysisData.bScenecut = false; } + } void x265_picture_free(x265_picture *p) @@ -955,7 +963,8 @@ { if (param && param->rc.zonefileCount) { for (int i = 0; i < param->rc.zonefileCount; i++) - x265_free(param->rc.zones[i].zoneParam); + if(param->rc.zones[i].zoneParam) + x265_free(param->rc.zones[i].zoneParam); } if (param && (param->rc.zoneCount || param->rc.zonefileCount)) x265_free(param->rc.zones); diff -r 37648fca915b -r deaecadc4306 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/encoder.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -119,6 +119,9 @@ m_frameEncoder[i] = NULL; for (uint32_t i = 0; i < DUP_BUFFER; i++) m_dupBuffer[i] = NULL; + + m_hist_of_adj_frames = NULL; + m_sad_stats = NULL; MotionEstimate::initScales(); #if ENABLE_HDR10_PLUS @@ -162,7 +165,9 @@ int rows = (p->sourceHeight + p->maxCUSize - 1) >> g_log2Size[p->maxCUSize]; int cols = (p->sourceWidth + p->maxCUSize - 1) >> g_log2Size[p->maxCUSize]; - if (m_param->bEnableFrameDuplication) + + + if (m_param->bEnableFrameDuplication || m_param->bHistbasedScenecut) { size_t framesize = 0; int pixelbytes = p->sourceBitDepth > 8 ? 2 : 1; @@ -184,6 +189,7 @@ m_dupBuffer[i]->dupPlane = NULL; m_dupBuffer[i]->dupPlane = X265_MALLOC(char, framesize); m_dupBuffer[i]->dupPic->planes[0] = m_dupBuffer[i]->dupPlane; + m_dupBuffer[i]->bufUpdated = false; m_dupBuffer[i]->bOccupied = false; m_dupBuffer[i]->bDup = false; } @@ -820,7 +826,7 @@ m_exportedPic = NULL; } - if (m_param->bEnableFrameDuplication) + if (m_param->bEnableFrameDuplication || m_param->bHistbasedScenecut) { for (uint32_t i = 0; i < DUP_BUFFER; i++) { @@ -1280,6 +1286,33 @@ return psnrWeight = (psnrY * 6 + psnrU + psnrV) / 8; } +void Encoder::updateSceneCutAndFrameDuplicateFlags() { + /* SCD computation and drop flag*/ + for (int i = 0; i < DUP_BUFFER; i++) { + if (m_dupBuffer[i]->bufUpdated) { + m_hist_of_adj_frames[i].setUpdateFlag(true); + m_hist_of_adj_frames[i].edgeFilter(m_dupBuffer[i]->dupPic); + m_hist_of_adj_frames[i].computeHistograms(*m_dupBuffer[i]->dupPic); + m_sad_stats->computeSadValue(m_hist_of_adj_frames, m_hist_of_adj_frames->plane_sizes); + m_sad_stats->findSceneCuts(m_dupBuffer[i]->dupPic, m_dupBuffer[i]->bDup); + + if (m_dupBuffer[i]->dupPic->analysisData.bScenecut) { + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d edge hist sad: %0.4lf maxuv hist sad: %0.4lf\n", + m_dupBuffer[i]->dupPic->poc,m_dupBuffer[i]->dupPic->analysisData.edgeSadValue,m_dupBuffer[i]->dupPic->analysisData.chromaSadValue); + } + + if (m_dupBuffer[1]->bufUpdated) + m_hist_of_adj_frames[0] = m_hist_of_adj_frames[1]; + } + } + + } + +/* TBD +- to be updated for missing parameters in case of re-use else where and improvised to copy constructor / assignment operator of x265 picture data structure. +- benefits avoid function and use language features appropriately. +*/ + void Encoder::copyPicture(x265_picture *dest, const x265_picture *src) { dest->poc = src->poc; @@ -1299,6 +1332,25 @@ memcpy(dest->planes[0], src->planes[0], src->framesize * sizeof(char)); dest->planes[1] = (char*)dest->planes[0] + src->stride[0] * src->height; dest->planes[2] = (char*)dest->planes[1] + src->stride[1] * (src->height >> x265_cli_csps[src->colorSpace].height[1]); + memcpy(&dest->analysisData, &src->analysisData, sizeof(src->analysisData)); + +} + +void Encoder::setPictureFlags(int idx) { + m_dupBuffer[idx]->bOccupied = true; + m_dupBuffer[idx]->bufUpdated = true; + m_dupBuffer[idx]->bDup = false; +} + +void Encoder::unsetPictureFlags(int idx) { + if (idx == 1) { + m_dupBuffer[idx]->bOccupied = false; + m_dupBuffer[idx]->bufUpdated = false; + m_dupBuffer[idx]->bDup = false; + } + else if (idx == 0) { + m_dupBuffer[idx]->bufUpdated = false; + } } /** @@ -1327,7 +1379,9 @@ const x265_picture* inputPic = NULL; static int written = 0, read = 0; bool dontRead = false; - + bool isScenecutEnabled = m_param->bHistbasedScenecut; + bool dropflag = false; + if (m_exportedPic) { if (!m_param->bUseAnalysisFile && m_param->analysisSave) @@ -1338,7 +1392,7 @@ } if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum < m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in && (read < written))) { - if ((m_param->bEnableFrameDuplication && !pic_in && (read < written))) + if ((m_param->bEnableFrameDuplication && !pic_in && (read < written)) || (isScenecutEnabled && !pic_in && (read < written))) dontRead = true; else { @@ -1361,7 +1415,7 @@ } } - if (m_param->bEnableFrameDuplication) + if (m_param->bEnableFrameDuplication || isScenecutEnabled ) { double psnrWeight = 0; @@ -1372,6 +1426,11 @@ copyPicture(m_dupBuffer[0]->dupPic, pic_in); m_dupBuffer[0]->bOccupied = true; written++; + if (m_param->bHistbasedScenecut) { + setPictureFlags(0); + updateSceneCutAndFrameDuplicateFlags(); + unsetPictureFlags(0); + } return 0; } else if (!m_dupBuffer[1]->bOccupied) @@ -1379,31 +1438,54 @@ copyPicture(m_dupBuffer[1]->dupPic, pic_in); m_dupBuffer[1]->bOccupied = true; written++; + if (m_param->bHistbasedScenecut) { + setPictureFlags(1); + updateSceneCutAndFrameDuplicateFlags(); + unsetPictureFlags(1); + } } - psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, m_dupBuffer[1]->dupPic, m_param); - - if (psnrWeight >= m_param->dupThreshold) - { - if (m_dupBuffer[0]->bDup) - { - m_dupBuffer[0]->dupPic->picStruct = tripling; - m_dupBuffer[0]->bDup = false; - read++; + if (m_param->bEnableFrameDuplication && m_param->bHistbasedScenecut) { + if (m_dupBuffer[1]->bDup == false && m_dupBuffer[1]->dupPic->analysisData.bScenecut == false) { + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, m_dupBuffer[1]->dupPic, m_param); + if (psnrWeight >= m_param->dupThreshold) + dropflag = true; } - else - { - m_dupBuffer[0]->dupPic->picStruct = doubling; - m_dupBuffer[0]->bDup = true; - m_dupBuffer[1]->bOccupied = false; - read++; - return 0; + else { + dropflag = true; } } - else if (m_dupBuffer[0]->bDup) + else if (m_param->bEnableFrameDuplication) { + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, m_dupBuffer[1]->dupPic, m_param); + if (psnrWeight >= m_param->dupThreshold) + dropflag = true; + } + + if (m_param->bEnableFrameDuplication) + { + if (dropflag) + { + if (m_dupBuffer[0]->bDup) + { + m_dupBuffer[0]->dupPic->picStruct = tripling; + m_dupBuffer[0]->bDup = false; + read++; + } + else + { + m_dupBuffer[0]->dupPic->picStruct = doubling; + m_dupBuffer[0]->bDup = true; + m_dupBuffer[1]->bOccupied = false; + read++; + return 0; + } + } + else if (m_dupBuffer[0]->bDup) m_dupBuffer[0]->bDup = false; - else - m_dupBuffer[0]->dupPic->picStruct = 0; + else + m_dupBuffer[0]->dupPic->picStruct = 0; + } + } if (read < written) @@ -1485,7 +1567,10 @@ inFrame->m_poc = ++m_pocLast; inFrame->m_userData = inputPic->userData; - inFrame->m_pts = inputPic->pts; + inFrame->m_pts = inputPic->pts; + if (m_param->bHistbasedScenecut) { + inFrame->m_lowres.bScenecut = inputPic->analysisData.bScenecut; + } inFrame->m_forceqp = inputPic->forceqp; inFrame->m_param = (m_reconfigure || m_reconfigureRc) ? m_latestParam : m_param; inFrame->m_picStruct = inputPic->picStruct; @@ -1613,7 +1698,7 @@ m_param->bUseRcStats = 0; } - if (m_param->bEnableFrameDuplication && ((read < written) || (m_dupBuffer[0]->dupPic->picStruct == tripling && (read <= written)))) + if ( (m_param->bEnableFrameDuplication || isScenecutEnabled) && ((read < written) || (m_dupBuffer[0]->dupPic->picStruct == tripling && (read <= written)))) { if (m_dupBuffer[0]->dupPic->picStruct == tripling) m_dupBuffer[0]->bOccupied = m_dupBuffer[1]->bOccupied = false; @@ -3162,6 +3247,7 @@ * adaptive I frame placement */ p->keyframeMax = INT_MAX; p->scenecutThreshold = 0; + p->bHistbasedScenecut = 0; } else if (p->keyframeMax <= 1) { @@ -3175,6 +3261,7 @@ p->lookaheadDepth = 0; p->bframes = 0; p->scenecutThreshold = 0; + p->bHistbasedScenecut = 0; p->bFrameAdaptive = 0; p->rc.cuTree = 0; p->bEnableWeightedPred = 0; @@ -3828,6 +3915,20 @@ m_param->searchMethod = m_param->hmeSearchMethod[2]; } } + + if (p->bHistbasedScenecut && p->scenecutThreshold) { + p->scenecutThreshold = 0; + p->bHistbasedScenecut = false; + x265_log(p, X265_LOG_WARNING, "Amibigious choice. disabling scene cut detection \n"); + } + else if (p->scenecutThreshold && p->edgeTransitionThreshold != 0.01) { + x265_log(p, X265_LOG_WARNING, "using scenecut-bias %d for scene cut detection\n",p->scenecutBias); + } + else if (p->bHistbasedScenecut && p->edgeTransitionThreshold == 0.0) { + p->edgeTransitionThreshold = 0.01; + x265_log(p, X265_LOG_INFO, "using default threshold %.2lf for scene cut detection\n", p->edgeTransitionThreshold); + } + } void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x265_picture* picIn, int paramBytes) diff -r 37648fca915b -r deaecadc4306 source/encoder/encoder.h --- a/source/encoder/encoder.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/encoder.h Tue Oct 15 12:16:17 2019 +0530 @@ -32,6 +32,8 @@ #include "nal.h" #include "framedata.h" #include "svt.h" +#include "scenecut.h" + #ifdef ENABLE_HDR10_PLUS #include "dynamicHDR10/hdr10plus.h" #endif @@ -154,6 +156,9 @@ //Flag to check whether the picture has duplicated. bool bDup; + + bool bufUpdated; + }; @@ -195,6 +200,9 @@ ThreadPool* m_threadPool; FrameEncoder* m_frameEncoder[X265_MAX_FRAME_THREADS]; + + YuvHistogram* m_hist_of_adj_frames; + sad_stats* m_sad_stats; DPB* m_dpb; Frame* m_exportedPic; FILE* m_analysisFileIn; @@ -279,6 +287,10 @@ if (m_prevTonemapPayload.payload != NULL) X265_FREE(m_prevTonemapPayload.payload); #endif + delete m_sad_stats; + m_sad_stats = NULL; + delete[] m_hist_of_adj_frames; + m_hist_of_adj_frames = NULL; }; void create(); @@ -349,6 +361,12 @@ void copyPicture(x265_picture *dest, const x265_picture *src); + void unsetPictureFlags(int index); + + void setPictureFlags(int index); + + void updateSceneCutAndFrameDuplicateFlags(); + void initRefIdx(); void analyseRefIdx(int *numRefIdx); void updateRefIdx(); @@ -364,6 +382,7 @@ void initSPS(SPS *sps); void initPPS(PPS *pps); }; + } #endif // ifndef X265_ENCODER_H diff -r 37648fca915b -r deaecadc4306 source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/ratecontrol.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -493,6 +493,7 @@ CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP); CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax); CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold); + CMP_OPT_FIRST_PASS("hist-threshold", m_param->edgeTransitionThreshold); CMP_OPT_FIRST_PASS("intra-refresh", m_param->bIntraRefresh); if (m_param->bMultiPassOptRPS) { @@ -1183,6 +1184,7 @@ m_param->rc.bStatRead = 0; m_param->bFrameAdaptive = 0; m_param->scenecutThreshold = 0; + m_param->bHistbasedScenecut = false; m_param->rc.cuTree = 0; if (m_param->bframes > 1) m_param->bframes = 1; @@ -2173,7 +2175,7 @@ if (m_isVbv && m_currentSatd > 0 && curFrame) { if (m_param->lookaheadDepth || m_param->rc.cuTree || - m_param->scenecutThreshold || + (m_param->scenecutThreshold || m_param->bHistbasedScenecut) || (m_param->bFrameAdaptive && m_param->bframes)) { /* Lookahead VBV: If lookahead is done, raise the quantizer as necessary diff -r 37648fca915b -r deaecadc4306 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/slicetype.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -30,6 +30,7 @@ #include "primitives.h" #include "lowres.h" #include "mv.h" +#include "scenecut.h" #include "slicetype.h" #include "motion.h" @@ -114,8 +115,8 @@ //Applying Gaussian filter on the picture src = (pixel*)curFrame->m_fencPic->m_picOrg[0]; refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX; + edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX; pixel pixelValue = 0; - for (int rowNum = 0; rowNum < height; rowNum++) { for (int colNum = 0; colNum < width; colNum++) @@ -127,7 +128,8 @@ 1 [4 9 12 9 4] --- [5 12 15 12 5] 159 [4 9 12 9 4] - [2 4 5 4 2]*/ + [2 4 5 4 2] + */ const intptr_t rowOne = (rowNum - 2)*stride, colOne = colNum - 2; const intptr_t rowTwo = (rowNum - 1)*stride, colTwo = colNum - 1; @@ -145,52 +147,7 @@ } } } - -#if HIGH_BIT_DEPTH //10-bit build - float threshold = 1023; - pixel whitePixel = 1023; -#else - float threshold = 255; - pixel whitePixel = 255; -#endif -#define PI 3.14159265 - - float gradientH = 0, gradientV = 0, radians = 0, theta = 0; - float gradientMagnitude = 0; - pixel blackPixel = 0; - edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX; - //Applying Sobel filter on the gaussian filtered picture - for (int rowNum = 0; rowNum < height; rowNum++) - { - for (int colNum = 0; colNum < width; colNum++) - { - edgeTheta[(rowNum*stride) + colNum] = 0; - if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1) && (colNum != width - 1)) //Ignoring the border pixels of the picture - { - /*Horizontal and vertical gradients - [ -3 0 3 ] [-3 -10 -3 ] - gH = [ -10 0 10] gV = [ 0 0 0 ] - [ -3 0 3 ] [ 3 10 3 ]*/ - - const intptr_t rowOne = (rowNum - 1)*stride, colOne = colNum -1; - const intptr_t rowTwo = rowNum * stride, colTwo = colNum; - const intptr_t rowThree = (rowNum + 1)*stride, colThree = colNum + 1; - const intptr_t index = (rowNum*stride) + colNum; - - gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 * refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 * refPic[rowThree + colThree]); - gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 * refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 * refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 * refPic[rowThree + colThree]); - - gradientMagnitude = sqrtf(gradientH * gradientH + gradientV * gradientV); - radians = atan2(gradientV, gradientH); - theta = (float)((radians * 180) / PI); - if (theta < 0) - theta = 180 + theta; - edgeTheta[(rowNum*stride) + colNum] = (pixel)theta; - - edgePic[index] = gradientMagnitude >= threshold ? whitePixel : blackPixel; - } - } - } + computeEdge(edgePic, refPic, edgeTheta, stride, height, width); } //Find the angle of a block by averaging the pixel angles @@ -1471,7 +1428,7 @@ if (m_lastNonB && !m_param->rc.bStatRead && ((m_param->bFrameAdaptive && m_param->bframes) || - m_param->rc.cuTree || m_param->scenecutThreshold || + m_param->rc.cuTree || m_param->scenecutThreshold || m_param->bHistbasedScenecut || (m_param->lookaheadDepth && m_param->rc.vbvBufferSize))) { slicetypeAnalyse(frames, false); @@ -1962,10 +1919,15 @@ int numBFrames = 0; int numAnalyzed = numFrames; - bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames); + bool isScenecut = false; /* When scenecut threshold is set, use scenecut detection for I frame placements */ - if (m_param->scenecutThreshold && isScenecut) + if (m_param->scenecutThreshold) + isScenecut = scenecut(frames, 0, 1, true, origNumFrames); + else if (m_param->bHistbasedScenecut) + isScenecut = frames[1]->bScenecut; + + if (isScenecut) { frames[1]->sliceType = X265_TYPE_I; return; @@ -1976,14 +1938,24 @@ m_extendGopBoundary = false; for (int i = m_param->bframes + 1; i < origNumFrames; i += m_param->bframes + 1) { - scenecut(frames, i, i + 1, true, origNumFrames); + if (m_param->scenecutThreshold) + scenecut(frames, i, i + 1, true, origNumFrames); + for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1, origNumFrames); j++) { - if (frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true) ) - { - m_extendGopBoundary = true; - break; - } + if (m_param->scenecutThreshold) + { + if (frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true)) + { + m_extendGopBoundary = true; + break; + } + } + else if(m_param->bHistbasedScenecut && frames[j]->bScenecut) + { + m_extendGopBoundary = true; + break; + } } if (m_extendGopBoundary) break; @@ -2088,13 +2060,23 @@ { for (int j = 1; j < numBFrames + 1; j++) { - if (scenecut(frames, j, j + 1, false, origNumFrames) || - (bForceRADL && (frames[j]->frameNum == preRADL))) - { - frames[j]->sliceType = X265_TYPE_P; - numAnalyzed = j; - break; + if (m_param->bHistbasedScenecut) { + if (frames[j]->bScenecut || (bForceRADL && (frames[j]->frameNum == preRADL))) + { + frames[j]->sliceType = X265_TYPE_P; + numAnalyzed = j; + break; + } } + else if (m_param->scenecutThreshold){ + if ( scenecut(frames, j, j + 1, false, origNumFrames) || (bForceRADL && (frames[j]->frameNum == preRADL)) ) + { + frames[j]->sliceType = X265_TYPE_P; + numAnalyzed = j; + break; + } + } + } } resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed + 1); diff -r 37648fca915b -r deaecadc4306 source/encoder/slicetype.h --- a/source/encoder/slicetype.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/slicetype.h Tue Oct 15 12:16:17 2019 +0530 @@ -43,6 +43,14 @@ #define AQ_EDGE_BIAS 0.5 #define EDGE_INCLINATION 45 +#ifdef HIGH_BIT_DEPTH +#define edge_threshold 1023.0 +#define whitePixel 1023.0 +#else +#define edge_threshold 255.0 +#define pixel whitePixel 255.0 +#endif + /* Thread local data for lookahead tasks */ struct LookaheadTLD { diff -r 37648fca915b -r deaecadc4306 source/test/regression-tests.txt --- a/source/test/regression-tests.txt Fri Oct 11 12:45:52 2019 +0530 +++ b/source/test/regression-tests.txt Tue Oct 15 12:16:17 2019 +0530 @@ -157,6 +157,9 @@ ducks_take_off_420_720p50.y4m,--preset medium --aq-mode 4 --crf 22 --no-cutree ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao --crf 20 Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.01 +Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 --hist-scenecut --hist-threshold 0.01 +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --scenecut 40 --scenecut-bias 20 # Main12 intraCost overflow bug test 720p50_parkrun_ter.y4m,--preset medium diff -r 37648fca915b -r deaecadc4306 source/x265.h --- a/source/x265.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/x265.h Tue Oct 15 12:16:17 2019 +0530 @@ -210,7 +210,9 @@ uint32_t numCUsInFrame; uint32_t numPartitions; uint32_t depthBytes; - int bScenecut; + bool bScenecut; + double edgeSadValue; + double chromaSadValue; x265_weight_param* wt; x265_analysis_inter_data* interData; x265_analysis_intra_data* intraData; @@ -291,6 +293,9 @@ char sliceType; int bScenecut; double ipCostRatio; + double yedgeSadValue; + double chromaSadValue; + int frameLatency; x265_cu_stats cuStats; x265_pu_stats puStats; @@ -465,6 +470,9 @@ //Dolby Vision RPU metadata x265_dolby_vision_rpu rpu; + //Flag to determine the latest frame in the buffer + bool bufUpdated; + int fieldNum; //SEI picture structure message @@ -1017,8 +1025,9 @@ * decisions. Default is 0 - disabled. 1 is the same as 0. Max 16 */ int lookaheadSlices; - /* An arbitrary threshold which determines how aggressively the lookahead - * should detect scene cuts. The default (40) is recommended. */ + /* An arbitrary threshold which determines how aggressively the lookahead + * should detect scene cuts. The default (40) is recommended. + * Used for encoding cost based scenecut detection */ int scenecutThreshold; /* Replace keyframes by using a column of intra blocks that move across the video @@ -1803,6 +1812,7 @@ /*Emit content light level info SEI*/ int bEmitCLL; + /* * Signals picture structure SEI timing message for every frame @@ -1819,6 +1829,17 @@ /*Input sequence bit depth. It can be either 8bit, 10bit or 12bit.*/ int sourceBitDepth; + + /* A genuine threshold which determines whether a frame is a scenecut or not + * when compared against edge and color sad values of a frames histograms.Default 0.01 + * Range:real number in range (0,2) + * Used for histogram based scene cut detection */ + double edgeTransitionThreshold; + + /*enables improved scenecut detection algorithm to detect scenecuts for slice type + decision and rate control */ + bool bHistbasedScenecut; + } x265_param; /* x265_param_alloc: * Allocates an x265_param instance. The returned param structure is not diff -r 37648fca915b -r deaecadc4306 source/x265cli.h --- a/source/x265cli.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/x265cli.h Tue Oct 15 12:16:17 2019 +0530 @@ -129,6 +129,9 @@ { "scenecut", required_argument, NULL, 0 }, { "no-scenecut", no_argument, NULL, 0 }, { "scenecut-bias", required_argument, NULL, 0 }, + { "hist-scenecut", no_argument, NULL, 0}, + { "no-hist-scenecut", no_argument, NULL, 0}, + { "hist-threshold", required_argument, NULL, 0}, { "fades", no_argument, NULL, 0 }, { "no-fades", no_argument, NULL, 0 }, { "radl", required_argument, NULL, 0 }, @@ -485,7 +488,10 @@ H0(" --gop-lookahead <integer> Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n"); H0(" --no-scenecut Disable adaptive I-frame decision\n"); H0(" --scenecut <integer> How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold); - H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); + H0(" --hist-scenecut ..... Enables improved scene-cut detection using histogram based algorithm."); + H0(" --no-hist-scenecut Disables improved scene-cut detection using histogram based algorithm. "); + H0(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); + H0(" --hist-threshold <0.0..2.0> Threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold); H0(" --[no-]fades Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades)); H0(" --radl <integer> Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl); H0(" --intra-refresh Use Periodic Intra Refresh instead of IDR frames\n");
# HG changeset patch # User gopi Satykrishna Akisetty # Date 1571121977 -19800 # Tue Oct 15 12:16:17 2019 +0530 # Node ID deaecadc43060ba37a85d9724a1a306a86433432 # Parent 37648fca915b389bafe923d8443818359e80ebf2 Improved scenecut detection This patch does the following. 1. identifies scenecuts using sad of edge and chroma histogram based thresholding. 2. Add option "--hist-scenecut" and "--hist-threshold' to enable improved scenecut method for slice type decisions,rate control and a threshold for determining scene-cuts. 3. Removed duplicate edgefilter code and created a global function for use in scene cut detection and aq in Lookahead. diff -r 37648fca915b -r deaecadc4306 doc/reST/cli.rst --- a/doc/reST/cli.rst Fri Oct 11 12:45:52 2019 +0530 +++ b/doc/reST/cli.rst Tue Oct 15 12:16:17 2019 +0530 @@ -1427,6 +1427,18 @@ intra cost of a frame used in scenecut detection. For example, a value of 5 indicates, if the inter cost of a frame is greater than or equal to 95 percent of the intra cost of the frame, then detect this frame as scenecut. Values between 5 and 15 are recommended. Default 5. + +.. option:: --hist-scenecut, --no-hist-scenecut + + indicates that I-frames need to be inserted using edge and color histogram based scenecut algorithm. + option: `--hist-scencut` 1 enables adaptive I frame placement using this method and disables the default scene cut algorithm. + option:`--no-hist-scenecut` adaptive I frame placement. + +.. option:: --hist-threshold <0.0..2.0> + + This value represents the threshold for SAD for edge histograms used in scenecut detection. This requires hist-scenecut to be enabled. + For example, a value of 0.2 indicates that a frame with SAD value greater than 0.2 against the previous frame as scenecut. + Values between 0.0 and 2.0 are recommended. Default 0.1. .. option:: --radl <integer> diff -r 37648fca915b -r deaecadc4306 source/CMakeLists.txt --- a/source/CMakeLists.txt Fri Oct 11 12:45:52 2019 +0530 +++ b/source/CMakeLists.txt Tue Oct 15 12:16:17 2019 +0530 @@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 180) +set(X265_BUILD 181) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" diff -r 37648fca915b -r deaecadc4306 source/common/CMakeLists.txt --- a/source/common/CMakeLists.txt Fri Oct 11 12:45:52 2019 +0530 +++ b/source/common/CMakeLists.txt Tue Oct 15 12:16:17 2019 +0530 @@ -151,4 +151,5 @@ predict.cpp predict.h scalinglist.cpp scalinglist.h quant.cpp quant.h contexts.h - deblock.cpp deblock.h) + deblock.cpp deblock.h + scenecut.h scenecut.cpp) diff -r 37648fca915b -r deaecadc4306 source/common/common.h --- a/source/common/common.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/common/common.h Tue Oct 15 12:16:17 2019 +0530 @@ -129,14 +129,20 @@ typedef uint64_t sum2_t; typedef uint64_t pixel4; typedef int64_t ssum2_t; +#define HISTOGRAM_SIZE 1024 +#define SHIFT 1 #else typedef uint8_t pixel; typedef uint16_t sum_t; typedef uint32_t sum2_t; typedef uint32_t pixel4; typedef int32_t ssum2_t; // Signed sum +#define HISTOGRAM_SIZE 256 +#define SHIFT 0 #endif // if HIGH_BIT_DEPTH +#define PI 3.14159265 + #if X265_DEPTH < 10 typedef uint32_t sse_t; #else diff -r 37648fca915b -r deaecadc4306 source/common/param.cpp --- a/source/common/param.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/common/param.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -167,6 +167,8 @@ param->bFrameAdaptive = X265_B_ADAPT_TRELLIS; param->bBPyramid = 1; param->scenecutThreshold = 40; /* Magic number pulled in from x264 */ + param->edgeTransitionThreshold = 0.01; + param->bHistbasedScenecut = false; param->lookaheadSlices = 8; param->lookaheadThreads = 0; param->scenecutBias = 5.0; @@ -567,6 +569,7 @@ param->bframes = 0; param->lookaheadDepth = 0; param->scenecutThreshold = 0; + param->bHistbasedScenecut = false; param->rc.cuTree = 0; param->frameNumThreads = 1; } @@ -609,7 +612,7 @@ return 0; } -static int x265_atobool(const char* str, bool& bError) +static bool x265_atobool(const char* str, bool& bError) { if (!strcmp(str, "1") || !strcmp(str, "true") || @@ -920,6 +923,7 @@ { bError = false; p->scenecutThreshold = atoi(value); + p->bHistbasedScenecut = false; } } OPT("temporal-layers") p->bEnableTemporalSubLayers = atobool(value); @@ -1186,6 +1190,31 @@ OPT("opt-ref-list-length-pps") p->bOptRefListLengthPPS = atobool(value); OPT("multi-pass-opt-rps") p->bMultiPassOptRPS = atobool(value); OPT("scenecut-bias") p->scenecutBias = atof(value); + OPT("hist-scenecut") + { + p->bHistbasedScenecut = atobool(value); + + if (bError) + { + bError = false; + p->bHistbasedScenecut = false; + } + + if (p->bHistbasedScenecut) { + bError = false; + p->scenecutThreshold = 0; + } + + } + OPT("hist-threshold") { + p->edgeTransitionThreshold = atof(value); + if (bError) + { + bError = false; + p->edgeTransitionThreshold = 0.01; + x265_log(p, X265_LOG_INFO, "using default threshold %.2lf for scene cut detection\n", p->edgeTransitionThreshold); + } + } OPT("lookahead-threads") p->lookaheadThreads = atoi(value); OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value); OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value); @@ -1623,8 +1652,14 @@ "Valid Logging level -1:none 0:error 1:warning 2:info 3:debug 4:full"); CHECK(param->scenecutThreshold < 0, "scenecutThreshold must be greater than 0"); - CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias, - "scenecut-bias must be between 0 and 100"); + if (param->scenecutThreshold) { + CHECK(param->scenecutBias < 0 || 100 < param->scenecutBias, + "scenecut-bias must be between 0 and 100"); + } + else if (param->bHistbasedScenecut) { + CHECK(param->edgeTransitionThreshold < 0.0 || 2.0 < param->edgeTransitionThreshold, + "hist-threshold must be between 0.0 and 2.0"); + } CHECK(param->radl < 0 || param->radl > param->bframes, "radl must be between 0 and bframes"); CHECK(param->rdPenalty < 0 || param->rdPenalty > 2, @@ -1780,10 +1815,20 @@ x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge : %s / %d / %d / %d\n", x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand); - if (param->keyframeMax != INT_MAX || param->scenecutThreshold) - x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax, param->scenecutThreshold, param->scenecutBias * 100); + if (param->scenecutThreshold && param->keyframeMax != INT_MAX) + param->edgeTransitionThreshold = 0.0; + else if (param->bHistbasedScenecut && param->keyframeMax != INT_MAX) + param->scenecutBias = 0.0; + else if (param->keyframeMax != INT_MAX) { + param->edgeTransitionThreshold = 0.0; + param->scenecutBias = 0.0; + } + + if (param->keyframeMax == INT_MAX) + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut : disabled\n"); else - x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut : disabled\n"); + x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / bias / threshold : %d / %d / %d / %.2lf / %.2lf\n", param->keyframeMin, param->keyframeMax, ( param->bHistbasedScenecut || param->scenecutThreshold ), param->scenecutBias * 100, param->edgeTransitionThreshold); + if (param->cbQpOffset || param->crQpOffset) x265_log(param, X265_LOG_INFO, "Cb/Cr QP Offset : %d / %d\n", param->cbQpOffset, param->crQpOffset); @@ -1949,6 +1994,8 @@ s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth); s += sprintf(s, " lookahead-slices=%d", p->lookaheadSlices); s += sprintf(s, " scenecut=%d", p->scenecutThreshold); + s += sprintf(s, " hist-scenecut=%d", p->bHistbasedScenecut); + s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold); s += sprintf(s, " radl=%d", p->radl); BOOL(p->bEnableHRDConcatFlag, "splice"); BOOL(p->bIntraRefresh, "intra-refresh"); @@ -2096,6 +2143,8 @@ BOOL(p->bOptRefListLengthPPS, "opt-ref-list-length-pps"); BOOL(p->bMultiPassOptRPS, "multi-pass-opt-rps"); s += sprintf(s, " scenecut-bias=%.2f", p->scenecutBias); + s += sprintf(s, " hist-threshold=%.2f", p->edgeTransitionThreshold); + BOOL(p->bOptCUDeltaQP, "opt-cu-delta-qp"); BOOL(p->bAQMotion, "aq-motion"); BOOL(p->bEmitHDRSEI, "hdr"); @@ -2246,6 +2295,7 @@ dst->lookaheadSlices = src->lookaheadSlices; dst->lookaheadThreads = src->lookaheadThreads; dst->scenecutThreshold = src->scenecutThreshold; + dst->bHistbasedScenecut = src->bHistbasedScenecut; dst->bIntraRefresh = src->bIntraRefresh; dst->maxCUSize = src->maxCUSize; dst->minCUSize = src->minCUSize; @@ -2403,6 +2453,7 @@ dst->bOptRefListLengthPPS = src->bOptRefListLengthPPS; dst->bMultiPassOptRPS = src->bMultiPassOptRPS; dst->scenecutBias = src->scenecutBias; + dst->edgeTransitionThreshold = src->edgeTransitionThreshold; dst->gopLookahead = src->lookaheadDepth; dst->bOptCUDeltaQP = src->bOptCUDeltaQP; dst->analysisMultiPassDistortion = src->analysisMultiPassDistortion; diff -r 37648fca915b -r deaecadc4306 source/common/scenecut.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/source/common/scenecut.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -0,0 +1,674 @@ +#include <iostream> +#include <sstream> +#include <vector> +#include <algorithm> +#include <math.h> +#include "encoder.h" +#include "scenecut.h" +#include "slicetype.h" + +using namespace std; +using namespace X265_NS; + +#define EDGE_PLANE_COUNT 1 + +namespace X265_NS { + +void computeEdge(pixel * edgePic, pixel *refPic, pixel * edgeTheta, intptr_t stride, int height, int width) + { + float gradientH = 0, gradientV = 0, radians = 0, theta = 0; + float gradientMagnitude = 0; + pixel blackPixel = 0; + + //Applying Sobel filter + for (int rowNum = 0; rowNum < height; rowNum++) + { + for (int colNum = 0; colNum < width; colNum++) + { + edgeTheta[(rowNum*stride) + colNum] = 0; + if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1) && (colNum != width - 1)) //Ignoring the border pixels of the picture + { + /*Horizontal and vertical gradients + [ -3 0 3 ] [-3 -10 -3 ] + gH = [ -10 0 10] gV = [ 0 0 0 ] + [ -3 0 3 ] [ 3 10 3 ]*/ + + const intptr_t rowOne = (rowNum - 1)*stride, colOne = colNum - 1; + const intptr_t rowTwo = rowNum * stride, colTwo = colNum; + const intptr_t rowThree = (rowNum + 1)*stride, colThree = colNum + 1; + const intptr_t index = (rowNum*stride) + colNum; + + gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 * refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 * refPic[rowThree + colThree]); + gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 * refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 * refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 * refPic[rowThree + colThree]); + + gradientMagnitude = sqrtf(gradientH * gradientH + gradientV * gradientV); + radians = atan2(gradientV, gradientH); + theta = (float)((radians * 180) / PI); + if (theta < 0) + theta = 180 + theta; + edgeTheta[(rowNum*stride) + colNum] = (pixel)theta; + + edgePic[index] = (pixel)(gradientMagnitude >= edge_threshold ? whitePixel : blackPixel); + } + } + } + } + + Histogram::Histogram() + { + memset(frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + } + + Histogram::Histogram(Histogram const& hist) { + memset(frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + for (int i = 0; i < HISTOGRAM_SIZE; i++) { + frequency_distribution[i] = hist.frequency_distribution[i]; + } + } + + Histogram & Histogram::operator=(Histogram const& hist) + { + memset(frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + memcpy(frequency_distribution, hist.frequency_distribution, sizeof(int32_t)*HISTOGRAM_SIZE); + return *this; + } + + Histogram::~Histogram() {} + + YuvHistogram::YuvHistogram() {} + + void YuvHistogram::initHistograms(int32_t planecount) { + this->plane_count = planecount; + m_isalloc = false; + bisUpdated = false; + param = NULL; + + yuv_hist = edge_hist = NULL; + plane_sizes = plane_heights = plane_widths = NULL; + edgePic = edgeThetaPic = NULL; + + plane_sizes = X265_MALLOC(int32_t, plane_count); + plane_heights = X265_MALLOC(int32_t, plane_count); + plane_widths = X265_MALLOC(int32_t, plane_count); + + if (!plane_sizes || !plane_heights || !plane_widths) { + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for plane dimensions\n"); + m_isalloc &= false; + } + else { + memset(plane_sizes, 0, plane_count * sizeof(int32_t)); + memset(plane_heights, 0, plane_count * sizeof(int32_t)); + memset(plane_widths, 0, plane_count * sizeof(int32_t)); + m_isalloc &= true; + } + + yuv_hist = X265_MALLOC(Histogram, plane_count); + edge_hist = X265_MALLOC(Histogram, plane_count); + + if (!yuv_hist || !edge_hist) { + m_isalloc &= false; + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for histograms\n"); + } + + } + + void YuvHistogram::initHistograms(x265_param *p) { + param = p; + plane_count = x265_cli_csps[param->internalCsp].planes; + bisUpdated = false; + m_isalloc = false; + + yuv_hist = edge_hist = NULL; + plane_sizes = plane_heights = plane_widths = NULL; + edgePic = edgeThetaPic = NULL; + + plane_sizes = X265_MALLOC(int32_t, plane_count); + plane_heights = X265_MALLOC(int32_t, plane_count); + plane_widths = X265_MALLOC(int32_t, plane_count); + + if (!plane_sizes || !plane_heights || !plane_widths) { + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for plane dimensions\n"); + m_isalloc &= false; + } + else { + memset(plane_sizes, 0, plane_count * sizeof(int32_t)); + memset(plane_heights, 0, plane_count * sizeof(int32_t)); + memset(plane_widths, 0, plane_count * sizeof(int32_t)); + m_isalloc &= true; + } + + yuv_hist = X265_MALLOC(Histogram, plane_count); + edge_hist = X265_MALLOC(Histogram, plane_count); + + if (!yuv_hist || !edge_hist) { + m_isalloc &= false; + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for histograms\n"); + } + + } + + bool YuvHistogram::allocHistogramBuffers() { + //allocate memory for edge filter output and histograms + bool isalloc = true; + + edgePic = X265_MALLOC(pixel*, plane_count); + edgeThetaPic = X265_MALLOC(pixel*, plane_count); + + if (!edgePic || !edgeThetaPic) { + isalloc &= false; + x265_log(param, X265_LOG_ERROR, "unable to allocate memory for edge buffers\n"); + return isalloc; + } + + for (int i = 0; i < plane_count; i++) { + + edgePic[i] = edgeThetaPic[i] = NULL; + edgePic[i] = X265_MALLOC(pixel, plane_sizes[i]); + edgeThetaPic[i] = X265_MALLOC(pixel, plane_sizes[i]); + + if (edgePic[i] && edgeThetaPic[i]) { + memset(edgePic[i], 0, plane_sizes[i] * sizeof(pixel)); + memset(edgeThetaPic[i], 0, plane_sizes[i] * sizeof(pixel)); + isalloc &= true; + } + else + isalloc &= false; + } + return isalloc; + } + + YuvHistogram::~YuvHistogram() + { + freeHistogramBuffers(); //change implementation based on allocation changes + } + + YuvHistogram::YuvHistogram(YuvHistogram const& hist) { + + maxuv_hist = hist.maxuv_hist; + plane_count = hist.plane_count; + bisUpdated = hist.bisUpdated; + param = hist.param; + memcpy(plane_sizes, hist.plane_sizes, plane_count * sizeof(int32_t)); + memcpy(plane_heights, hist.plane_heights, plane_count * sizeof(int32_t)); + memcpy(plane_widths, hist.plane_widths, plane_count * sizeof(int32_t)); + memcpy(yuv_hist, hist.yuv_hist, plane_count * sizeof(Histogram)); + memcpy(edge_hist, hist.edge_hist, plane_count * sizeof(Histogram)); + + if (!m_isalloc) { + m_isalloc = false; + m_isalloc = allocHistogramBuffers(); + } + + if (m_isalloc) { + for (int i = 0; i < plane_count; i++) { + if (edgePic[i] && edgeThetaPic[i]) { + memcpy(edgePic[i], hist.edgePic[i], plane_sizes[i] * sizeof(pixel)); + memcpy(edgeThetaPic[i], hist.edgeThetaPic[i], plane_sizes[i] * sizeof(pixel)); + } + } + } + + } + + YuvHistogram & YuvHistogram ::operator=(const YuvHistogram & copy_hist) + { + maxuv_hist = copy_hist.maxuv_hist; + plane_count = copy_hist.plane_count; + bisUpdated = copy_hist.bisUpdated; + param = copy_hist.param; + memcpy(plane_sizes, copy_hist.plane_sizes, plane_count * sizeof(int32_t)); + memcpy(plane_heights, copy_hist.plane_heights, plane_count * sizeof(int32_t)); + memcpy(plane_widths, copy_hist.plane_widths, plane_count * sizeof(int32_t)); + memcpy(yuv_hist, copy_hist.yuv_hist, plane_count * sizeof(Histogram)); + memcpy(edge_hist, copy_hist.edge_hist, plane_count * sizeof(Histogram)); + + if (!m_isalloc) { + m_isalloc = false; + m_isalloc = allocHistogramBuffers(); + } + + if (m_isalloc) { + for (int i = 0; i < plane_count; i++) { + if (edgePic[i] && edgeThetaPic[i]) { + memcpy(edgePic[i], copy_hist.edgePic[i], plane_sizes[i] * sizeof(pixel)); + memcpy(edgeThetaPic[i], copy_hist.edgeThetaPic[i], plane_sizes[i] * sizeof(pixel)); + } + } + } + + return *this; + } + + void YuvHistogram::initFrameDimensions(x265_picture & pic) { + + for (int i = 0; i < plane_count; i++) { + plane_widths[i] = pic.width; + plane_heights[i] = pic.height >> x265_cli_csps[pic.colorSpace].height[i]; + plane_sizes[i] = plane_widths[i] * plane_heights[i]; + } + } + + void YuvHistogram::freeHistogramBuffers() { + //de allocate memory for histograms and edge filtered output + if (edgePic && edgeThetaPic) { + for (int i = 0; i < plane_count; i++) { + if (edgePic[i] && edgeThetaPic[i]) { + X265_FREE_ZERO(edgePic[i]); + X265_FREE_ZERO(edgeThetaPic[i]); + } + } + X265_FREE_ZERO(edgePic); + X265_FREE_ZERO(edgeThetaPic); + } + + if (plane_sizes && plane_heights && plane_widths) { + X265_FREE_ZERO(plane_sizes); + X265_FREE_ZERO(plane_heights); + X265_FREE_ZERO(plane_widths); + } + + if (yuv_hist && edge_hist) { + X265_FREE_ZERO(yuv_hist); + X265_FREE_ZERO(edge_hist); + } + } + + bool YuvHistogram::edgeFilter(x265_picture *frame) { + + if (!m_isalloc) { + initFrameDimensions(*frame); + m_isalloc = allocHistogramBuffers(); + } + + if (m_isalloc) { + for (int idx = 0; idx < EDGE_PLANE_COUNT; idx++) { + + memset(edgePic[idx], 0, sizeof(pixel) * plane_sizes[idx]); + memset(edgeThetaPic[idx], 0, sizeof(pixel) * plane_sizes[idx]); + + pixel *src = (pixel*)frame->planes[idx]; + pixel *edge_pic = edgePic[idx]; + pixel *ref_pic = src; + pixel *edge_theta = edgeThetaPic[idx]; + + assert(edge_pic != NULL); + assert(ref_pic != NULL); + memcpy(edge_pic, src, plane_sizes[idx] * sizeof(pixel)); + memcpy(ref_pic, src, plane_sizes[idx] * sizeof(pixel)); + + computeEdge(edge_pic, ref_pic, edge_theta,plane_widths[idx], plane_heights[idx], plane_widths[idx]); + } + return true; + } + else { + return false; + } + } + + bool YuvHistogram::computeHistograms(x265_picture &cur_frame) { + + bool bsuccess = false; + bsuccess = computeLumaEdgeHistogram(cur_frame); + if (bsuccess) { + if (plane_count > 1) { + bsuccess &= computeChromaHistogram(cur_frame); + } + return bsuccess; + } + else { + return bsuccess; + } + + } + + bool YuvHistogram::computeLumaEdgeHistogram(x265_picture &frame) { + + pixel pixel_val = 0; + + memset(edge_hist[0].frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + + int size = frame.height*(frame.stride[0] >> SHIFT); + + for (int i = 0; i < size; i++) { + pixel_val = edgePic[0][i]; + edge_hist[0].frequency_distribution[pixel_val]++; + } + return true; + } + + bool YuvHistogram::computeChromaHistogram(x265_picture &frame) { + /*u hist calculation*/ + pixel pixel_val = 0; + int32_t pixel_ucount = 0, pixel_vcount = 0; + + int u_height = (frame.height >> x265_cli_csps[frame.colorSpace].height[1]); + int size = u_height * (frame.stride[1] >> SHIFT); + memset(yuv_hist[1].frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + + for (int i = 0; i < size; i++) { + pixel_val = *((pixel *)frame.planes[1] + i); + yuv_hist[1].frequency_distribution[pixel_val]++; + pixel_ucount++; + } + + /*v hist calculation for independent uv planes */ + + if (plane_count == 3) { + pixel_val = 0; + int v_height = (frame.height >> x265_cli_csps[frame.colorSpace].height[2]); + size = v_height * (frame.stride[2] >> SHIFT); + memset(yuv_hist[2].frequency_distribution, 0, HISTOGRAM_SIZE * sizeof(int32_t)); + + for (int i = 0; i < size; i++) { + pixel_val = *((pixel *)frame.planes[2] + i); + yuv_hist[2].frequency_distribution[pixel_val]++; + pixel_vcount++; + } + + for (int i = 0; i < HISTOGRAM_SIZE; i++) { + maxuv_hist.frequency_distribution[i] = max(yuv_hist[1].frequency_distribution[i], yuv_hist[2].frequency_distribution[i]); + } + + } + else { + maxuv_hist = yuv_hist[1]; //for two planes scenario + } + + return true; + } + + bool YuvHistogram::isUpdated() { + return bisUpdated; + } + + void YuvHistogram::setUpdateFlag(bool flag) { + bisUpdated = flag; + } + + bool YuvHistogram::getUpdateFlag() { + return bisUpdated; + } + + SadYuv::SadYuv() { } + + void SadYuv::initSadYuv(int planecount) { + this->plane_count = planecount; + sad_yuv = NULL; + psad_yuv = NULL; + sad_yuv = X265_MALLOC(int32_t, plane_count); + psad_yuv = X265_MALLOC(double, plane_count); + if (sad_yuv && psad_yuv) { + memset(sad_yuv, 0, plane_count * sizeof(int32_t)); + memset(psad_yuv, 0, sizeof(double) * plane_count); + } + } + + SadYuv & SadYuv::operator=(SadYuv const& sad_val) { + this->plane_count = sad_val.plane_count; + if (!sad_yuv && !psad_yuv) { + sad_yuv = NULL; + psad_yuv = NULL; + sad_yuv = X265_MALLOC(int32_t, plane_count); + psad_yuv = X265_MALLOC(double, plane_count); + if (sad_yuv && psad_yuv) { + memcpy(sad_yuv, sad_val.sad_yuv, plane_count * sizeof(int32_t)); + memcpy(psad_yuv, sad_val.psad_yuv, sizeof(double) * plane_count); + } + } + else { + if (sad_yuv) + memcpy(sad_yuv, sad_val.sad_yuv, plane_count * sizeof(int32_t)); + if (psad_yuv) + memcpy(psad_yuv, sad_val.psad_yuv, sizeof(double) * plane_count); + } + return *this; + } + + SadYuv::~SadYuv() { + if (sad_yuv && psad_yuv) { + X265_FREE(sad_yuv); + X265_FREE(psad_yuv); + } + } + + int sad_stats::frames_scanned=0; + int sad_stats::line_number=0; + + sad_stats::sad_stats(int planecount, double threshold) { + this->plane_count = planecount; + calculateThresholds(threshold); + allocateBuffers(); + } + + void sad_stats::calculateThresholds(double threshold) { + edge_hist_threshold = threshold; + strength_factor = 2.0; + chroma_hist_threshold = threshold * 10.0; + scaled_edge_threshold = edge_hist_threshold * strength_factor; + scaled_chroma_threshold = chroma_hist_threshold * strength_factor; + } + + void sad_stats::init() { + bscene_cut = NULL; + bdrop_frame = NULL; + sad_vals = NULL; + maxuv_sad_vals = NULL; + edge_sad_vals = NULL; + prev_hist = NULL; + } + + sad_stats::~sad_stats() { + releaseBuffers(); + } + + void sad_stats::allocateBuffers() { + + init(); + sad_vals = new SadYuv[DUP_BUFFER](); + maxuv_sad_vals = new SadYuv[DUP_BUFFER](); + edge_sad_vals = new SadYuv[DUP_BUFFER](); + prev_hist = new YuvHistogram(); + prev_hist->initHistograms(plane_count); + + for (int i = 0; i < DUP_BUFFER; i++) { + sad_vals[i].initSadYuv(plane_count); + maxuv_sad_vals[i].initSadYuv(plane_count); + edge_sad_vals[i].initSadYuv(plane_count); + } + + bscene_cut = new bool[DUP_BUFFER]; + bdrop_frame = new bool[DUP_BUFFER]; + + if (!sad_vals || !maxuv_sad_vals || !edge_sad_vals || !bscene_cut || !bdrop_frame) { + x265_log(NULL, X265_LOG_ERROR, "Heap Error !"); + exit(101); + } + else { + memset(bscene_cut, false, 2 * sizeof(bool)); + memset(bdrop_frame, false, 2 * sizeof(bool)); + } + + } + + void sad_stats::releaseBuffers() { + if (sad_vals && maxuv_sad_vals && edge_sad_vals && bscene_cut && bdrop_frame && prev_hist) { + delete[] sad_vals; + delete[] maxuv_sad_vals; + delete[] edge_sad_vals; + delete[] bscene_cut; + delete[] bdrop_frame; + delete prev_hist; + } + } + + bool sad_stats::computeSadValue(YuvHistogram *input_frames, int32_t* plane_sizes) { + + int32_t *yuv_sad_val = NULL, *edge_sad_val = NULL, *maxuv_sad_val = NULL; + + double *maxuv_normalized_sad = NULL, *yuv_norm_sad = NULL, *edge_normalized_sads = NULL; + + YuvHistogram * ref_hist = NULL, *cur_hist = NULL; + + /*inorder to process frames as per poc's updated by frame duplication */ + if (frames_scanned > 0) { + + if (!input_frames[0].isUpdated() && input_frames[1].isUpdated()) { + ref_hist = prev_hist; + cur_hist = input_frames + 1; + + yuv_sad_val = sad_vals[1].sad_yuv, + edge_sad_val = edge_sad_vals[1].sad_yuv, + maxuv_sad_val = maxuv_sad_vals[1].sad_yuv; + maxuv_normalized_sad = maxuv_sad_vals[1].psad_yuv, + yuv_norm_sad = sad_vals[1].psad_yuv, + edge_normalized_sads = edge_sad_vals[1].psad_yuv; + input_frames[1].setUpdateFlag(false); + + } + else if (input_frames[0].isUpdated() && input_frames[1].isUpdated()) { + ref_hist = prev_hist; + cur_hist = input_frames; + + yuv_sad_val = sad_vals[0].sad_yuv, + edge_sad_val = edge_sad_vals[0].sad_yuv, + maxuv_sad_val = maxuv_sad_vals[0].sad_yuv; + maxuv_normalized_sad = maxuv_sad_vals[0].psad_yuv, + yuv_norm_sad = sad_vals[0].psad_yuv, + edge_normalized_sads = edge_sad_vals[0].psad_yuv; + input_frames[0].setUpdateFlag(false); + + } + else if (input_frames[0].isUpdated() && !input_frames[1].isUpdated()) { + ref_hist = prev_hist; + cur_hist = input_frames; + + yuv_sad_val = sad_vals[0].sad_yuv, + edge_sad_val = edge_sad_vals[0].sad_yuv, + maxuv_sad_val = maxuv_sad_vals[0].sad_yuv; + maxuv_normalized_sad = maxuv_sad_vals[0].psad_yuv, + yuv_norm_sad = sad_vals[0].psad_yuv, + edge_normalized_sads = edge_sad_vals[0].psad_yuv; + input_frames[0].setUpdateFlag(false); + } + else { + return true; + } + } + else { + cur_hist = input_frames; + + yuv_sad_val = sad_vals[0].sad_yuv, + edge_sad_val = edge_sad_vals[0].sad_yuv, + maxuv_sad_val = maxuv_sad_vals[0].sad_yuv; + + maxuv_normalized_sad = maxuv_sad_vals[0].psad_yuv, + yuv_norm_sad = sad_vals[0].psad_yuv, + edge_normalized_sads = edge_sad_vals[0].psad_yuv; + input_frames[0].setUpdateFlag(false); + } + + if (frames_scanned == 0) { //first frame is scenecut by default no sad computation for the same. + + maxuv_sad_val[0] = 0; + maxuv_normalized_sad[0] = 0.0; + memset(yuv_sad_val, 0 , plane_count * sizeof(int32_t)); + memset(edge_sad_val, 0, plane_count * sizeof(int32_t)); + memset(edge_normalized_sads, 0, plane_count * sizeof(double)); + memset(yuv_norm_sad, 0, plane_count * sizeof(double)); + + } + else { + int32_t freq_diff[3]; + int32_t maxuv_freq_diff[1]; + int32_t edge_freq_diff[3]; + double color_probability_diff[3], edge_probability_diff[3]; + + memset(yuv_sad_val, 0, plane_count*sizeof(int32_t)); + memset(edge_sad_val, 0, plane_count*sizeof(int32_t)); + + memset(yuv_norm_sad, 0, plane_count * sizeof(double)); + memset(edge_normalized_sads, 0, plane_count * sizeof(double)); + memset(color_probability_diff, 0, plane_count * sizeof(double)); + memset(edge_probability_diff, 0, plane_count * sizeof(double)); + + maxuv_normalized_sad[0] = 0.0; + maxuv_sad_val[0] = 0; + + memset(freq_diff, 0, 3 * sizeof(int32_t)); + memset(maxuv_freq_diff, 0, sizeof(int32_t)); + memset(edge_freq_diff, 0, 3 * sizeof(int32_t)); + + for (int i = 0; i < plane_count; i++) { + { + for (int j = 0; j < HISTOGRAM_SIZE; j++) { + + if (i == 0 && plane_count >= 1) { + maxuv_freq_diff[i] = (abs(cur_hist->maxuv_hist.frequency_distribution[j] - ref_hist->maxuv_hist.frequency_distribution[j])); + maxuv_sad_val[i] += maxuv_freq_diff[i]; + maxuv_normalized_sad[i] += (double)maxuv_freq_diff[i] / plane_sizes[i]; + edge_freq_diff[i] = abs(cur_hist->edge_hist[i].frequency_distribution[j] - ref_hist->edge_hist[i].frequency_distribution[j]); + edge_probability_diff[i] = double(edge_freq_diff[i]) / plane_sizes[i]; + edge_sad_val[i] += edge_freq_diff[i]; + edge_normalized_sads[i] += edge_probability_diff[i]; + } + else { + freq_diff[i] = abs(cur_hist->yuv_hist[i].frequency_distribution[j] - ref_hist->yuv_hist[i].frequency_distribution[j]); + color_probability_diff[i] = (double)freq_diff[i] / plane_sizes[i]; + yuv_sad_val[i] += freq_diff[i]; + yuv_norm_sad[i] += color_probability_diff[i]; + } + + } + + } + } + + } + + *prev_hist = *cur_hist; + + frames_scanned++; + + return true; + } + + void sad_stats::findSceneCuts(x265_picture * picList, bool& bdup) { + + if (frames_scanned == 1) { + //for first frame + bscene_cut[0] = true; + bdrop_frame[0] = false; + picList->analysisData.bScenecut = (int)getSceneCutflag(0); + bdup = getDropflag(0); + picList->analysisData.edgeSadValue = edge_sad_vals[0].psad_yuv[0]; + picList->analysisData.chromaSadValue = maxuv_sad_vals[0].psad_yuv[0]; + } + else { + bscene_cut[1] = bdrop_frame[1] = false; + if (edge_sad_vals[1].psad_yuv[0] == 0) { + bdrop_frame[1] = true; + } + else if (edge_sad_vals[1].psad_yuv[0] > edge_hist_threshold || maxuv_sad_vals[1].psad_yuv[0] >= chroma_hist_threshold) { + bscene_cut[1] = true; + bdrop_frame[1] = false; + } + else if (edge_sad_vals[1].psad_yuv[0] > scaled_edge_threshold || maxuv_sad_vals[1].psad_yuv[0] >= scaled_chroma_threshold) { + bscene_cut[1] = true; + bdrop_frame[1] = false; + } + picList->analysisData.bScenecut = (int)getSceneCutflag(1); + bdup = getDropflag(1); + picList->analysisData.edgeSadValue = edge_sad_vals[1].psad_yuv[0]; + picList->analysisData.chromaSadValue = maxuv_sad_vals[1].psad_yuv[0]; + } + } + + bool sad_stats::getDropflag(int i) { + return bdrop_frame[i]; + } + + bool sad_stats::getSceneCutflag(int i) { + return bscene_cut[i]; + } + +} \ No newline at end of file diff -r 37648fca915b -r deaecadc4306 source/common/scenecut.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/source/common/scenecut.h Tue Oct 15 12:16:17 2019 +0530 @@ -0,0 +1,147 @@ +#ifndef SCENECUT_H +#define SCENECUT_H + +#include <string> +#include <iostream> +#include <sstream> +#include <vector> +#include <algorithm> +#include <math.h> + +#include "yuv.h" +#include "common.h" + +#ifdef HIGH_BIT_DEPTH +#define edge_threshold 1023.0 +#define whitePixel 1023.0 +#else +#define edge_threshold 255.0 +#define pixel whitePixel 255.0 +#endif + +using namespace std; + +namespace X265_NS { + + class Histogram { + + public: + int32_t frequency_distribution[HISTOGRAM_SIZE]; + + Histogram(); + + Histogram(Histogram const& hist); + + Histogram & operator=(Histogram const& hist); + + ~Histogram(); + + }; + + class YuvHistogram { + public: + Histogram *yuv_hist; + Histogram *edge_hist; + int32_t *plane_sizes; + int32_t *plane_heights; + int32_t *plane_widths; + + Histogram maxuv_hist; + int32_t plane_count; + bool bisUpdated; + + pixel** edgePic; + pixel** edgeThetaPic; + + x265_param * param; /*for handling various color spaces*/ + bool m_isalloc; + + YuvHistogram(); + + void initHistograms(int32_t plane_count); + + void initHistograms(x265_param *p); + + bool allocHistogramBuffers(); + + YuvHistogram(YuvHistogram const& hist); + + YuvHistogram & operator=(const YuvHistogram & copy_hist); + + ~YuvHistogram(); + + void initFrameDimensions(x265_picture & pic); + + void freeHistogramBuffers(); + + bool edgeFilter(x265_picture *frame); + + bool computeHistograms(x265_picture &cur_frame); + + bool computeLumaEdgeHistogram(x265_picture &frame); + + bool computeChromaHistogram(x265_picture &frame); + + bool isUpdated(); + + void setUpdateFlag(bool flag); + + bool getUpdateFlag(); + + }; + + struct SadYuv { + int32_t *sad_yuv; + double *psad_yuv; + int plane_count; + ~SadYuv(); + SadYuv(); + void initSadYuv(int plane_count); + SadYuv & operator=(SadYuv const& sad_val); +}; + + class sad_stats { + + bool *bscene_cut; + bool *bdrop_frame; + SadYuv * sad_vals; + SadYuv * maxuv_sad_vals; + SadYuv * edge_sad_vals; + int plane_count; + static int line_number; + static int frames_scanned; + YuvHistogram *prev_hist; + double edge_hist_threshold; + double chroma_hist_threshold; + double scaled_chroma_threshold; + double scaled_edge_threshold; + double strength_factor; + + public: + sad_stats(int plane_count, double threshold); + + ~sad_stats(); + + void init(); + + void allocateBuffers(); + + void releaseBuffers(); + + void calculateThresholds(double threshold); + + bool computeSadValue(YuvHistogram *frames, int32_t* plane_sizes); + + void findSceneCuts(x265_picture * piclist,bool & bdup); + + bool getDropflag(int i); + + bool getSceneCutflag(int i); + + }; + +void computeEdge(pixel * edgePic, pixel *refPic, pixel * edgeTheta, intptr_t stride, int height, int width); + +} + +#endif diff -r 37648fca915b -r deaecadc4306 source/encoder/api.cpp --- a/source/encoder/api.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/api.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -31,6 +31,7 @@ #include "nal.h" #include "bitcost.h" #include "svt.h" +#include "scenecut.h" #if ENABLE_LIBVMAF #include "libvmaf.h" @@ -117,7 +118,10 @@ x265_log(param, X265_LOG_INFO, "build info %s\n", PFX(build_info_str)); encoder = new Encoder; - + encoder->m_sad_stats = new sad_stats(x265_cli_csps[p->internalCsp].planes,param->edgeTransitionThreshold); + encoder->m_hist_of_adj_frames = new YuvHistogram[2]; + encoder->m_hist_of_adj_frames[0].initHistograms(p); + encoder->m_hist_of_adj_frames[1].initHistograms(p); #ifdef SVT_HEVC if (param->bEnableSvtHevc) @@ -809,6 +813,7 @@ CHECKED_MALLOC_ZERO(interData->ref, int32_t, analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir); } analysis->interData = interData; + analysis->bScenecut = false; return; @@ -924,6 +929,7 @@ pic->rpu.payloadSize = 0; pic->rpu.payload = NULL; pic->picStruct = 0; + pic->bufUpdated = false; if ((param->analysisSave || param->analysisLoad) || (param->bAnalysisType == AVC_INFO)) { @@ -933,7 +939,9 @@ uint32_t numCUsInFrame = widthInCU * heightInCU; pic->analysisData.numCUsInFrame = numCUsInFrame; pic->analysisData.numPartitions = param->num4x4Partitions; + pic->analysisData.bScenecut = false; } + } void x265_picture_free(x265_picture *p) @@ -955,7 +963,8 @@ { if (param && param->rc.zonefileCount) { for (int i = 0; i < param->rc.zonefileCount; i++) - x265_free(param->rc.zones[i].zoneParam); + if(param->rc.zones[i].zoneParam) + x265_free(param->rc.zones[i].zoneParam); } if (param && (param->rc.zoneCount || param->rc.zonefileCount)) x265_free(param->rc.zones); diff -r 37648fca915b -r deaecadc4306 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/encoder.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -119,6 +119,9 @@ m_frameEncoder[i] = NULL; for (uint32_t i = 0; i < DUP_BUFFER; i++) m_dupBuffer[i] = NULL; + + m_hist_of_adj_frames = NULL; + m_sad_stats = NULL; MotionEstimate::initScales(); #if ENABLE_HDR10_PLUS @@ -162,7 +165,9 @@ int rows = (p->sourceHeight + p->maxCUSize - 1) >> g_log2Size[p->maxCUSize]; int cols = (p->sourceWidth + p->maxCUSize - 1) >> g_log2Size[p->maxCUSize]; - if (m_param->bEnableFrameDuplication) + + + if (m_param->bEnableFrameDuplication || m_param->bHistbasedScenecut) { size_t framesize = 0; int pixelbytes = p->sourceBitDepth > 8 ? 2 : 1; @@ -184,6 +189,7 @@ m_dupBuffer[i]->dupPlane = NULL; m_dupBuffer[i]->dupPlane = X265_MALLOC(char, framesize); m_dupBuffer[i]->dupPic->planes[0] = m_dupBuffer[i]->dupPlane; + m_dupBuffer[i]->bufUpdated = false; m_dupBuffer[i]->bOccupied = false; m_dupBuffer[i]->bDup = false; } @@ -820,7 +826,7 @@ m_exportedPic = NULL; } - if (m_param->bEnableFrameDuplication) + if (m_param->bEnableFrameDuplication || m_param->bHistbasedScenecut) { for (uint32_t i = 0; i < DUP_BUFFER; i++) { @@ -1280,6 +1286,33 @@ return psnrWeight = (psnrY * 6 + psnrU + psnrV) / 8; } +void Encoder::updateSceneCutAndFrameDuplicateFlags() { + /* SCD computation and drop flag*/ + for (int i = 0; i < DUP_BUFFER; i++) { + if (m_dupBuffer[i]->bufUpdated) { + m_hist_of_adj_frames[i].setUpdateFlag(true); + m_hist_of_adj_frames[i].edgeFilter(m_dupBuffer[i]->dupPic); + m_hist_of_adj_frames[i].computeHistograms(*m_dupBuffer[i]->dupPic); + m_sad_stats->computeSadValue(m_hist_of_adj_frames, m_hist_of_adj_frames->plane_sizes); + m_sad_stats->findSceneCuts(m_dupBuffer[i]->dupPic, m_dupBuffer[i]->bDup); + + if (m_dupBuffer[i]->dupPic->analysisData.bScenecut) { + x265_log(m_param, X265_LOG_DEBUG, "scene cut at %d edge hist sad: %0.4lf maxuv hist sad: %0.4lf\n", + m_dupBuffer[i]->dupPic->poc,m_dupBuffer[i]->dupPic->analysisData.edgeSadValue,m_dupBuffer[i]->dupPic->analysisData.chromaSadValue); + } + + if (m_dupBuffer[1]->bufUpdated) + m_hist_of_adj_frames[0] = m_hist_of_adj_frames[1]; + } + } + + } + +/* TBD +- to be updated for missing parameters in case of re-use else where and improvised to copy constructor / assignment operator of x265 picture data structure. +- benefits avoid function and use language features appropriately. +*/ + void Encoder::copyPicture(x265_picture *dest, const x265_picture *src) { dest->poc = src->poc; @@ -1299,6 +1332,25 @@ memcpy(dest->planes[0], src->planes[0], src->framesize * sizeof(char)); dest->planes[1] = (char*)dest->planes[0] + src->stride[0] * src->height; dest->planes[2] = (char*)dest->planes[1] + src->stride[1] * (src->height >> x265_cli_csps[src->colorSpace].height[1]); + memcpy(&dest->analysisData, &src->analysisData, sizeof(src->analysisData)); + +} + +void Encoder::setPictureFlags(int idx) { + m_dupBuffer[idx]->bOccupied = true; + m_dupBuffer[idx]->bufUpdated = true; + m_dupBuffer[idx]->bDup = false; +} + +void Encoder::unsetPictureFlags(int idx) { + if (idx == 1) { + m_dupBuffer[idx]->bOccupied = false; + m_dupBuffer[idx]->bufUpdated = false; + m_dupBuffer[idx]->bDup = false; + } + else if (idx == 0) { + m_dupBuffer[idx]->bufUpdated = false; + } } /** @@ -1327,7 +1379,9 @@ const x265_picture* inputPic = NULL; static int written = 0, read = 0; bool dontRead = false; - + bool isScenecutEnabled = m_param->bHistbasedScenecut; + bool dropflag = false; + if (m_exportedPic) { if (!m_param->bUseAnalysisFile && m_param->analysisSave) @@ -1338,7 +1392,7 @@ } if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum < m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in && (read < written))) { - if ((m_param->bEnableFrameDuplication && !pic_in && (read < written))) + if ((m_param->bEnableFrameDuplication && !pic_in && (read < written)) || (isScenecutEnabled && !pic_in && (read < written))) dontRead = true; else { @@ -1361,7 +1415,7 @@ } } - if (m_param->bEnableFrameDuplication) + if (m_param->bEnableFrameDuplication || isScenecutEnabled ) { double psnrWeight = 0; @@ -1372,6 +1426,11 @@ copyPicture(m_dupBuffer[0]->dupPic, pic_in); m_dupBuffer[0]->bOccupied = true; written++; + if (m_param->bHistbasedScenecut) { + setPictureFlags(0); + updateSceneCutAndFrameDuplicateFlags(); + unsetPictureFlags(0); + } return 0; } else if (!m_dupBuffer[1]->bOccupied) @@ -1379,31 +1438,54 @@ copyPicture(m_dupBuffer[1]->dupPic, pic_in); m_dupBuffer[1]->bOccupied = true; written++; + if (m_param->bHistbasedScenecut) { + setPictureFlags(1); + updateSceneCutAndFrameDuplicateFlags(); + unsetPictureFlags(1); + } } - psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, m_dupBuffer[1]->dupPic, m_param); - - if (psnrWeight >= m_param->dupThreshold) - { - if (m_dupBuffer[0]->bDup) - { - m_dupBuffer[0]->dupPic->picStruct = tripling; - m_dupBuffer[0]->bDup = false; - read++; + if (m_param->bEnableFrameDuplication && m_param->bHistbasedScenecut) { + if (m_dupBuffer[1]->bDup == false && m_dupBuffer[1]->dupPic->analysisData.bScenecut == false) { + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, m_dupBuffer[1]->dupPic, m_param); + if (psnrWeight >= m_param->dupThreshold) + dropflag = true; } - else - { - m_dupBuffer[0]->dupPic->picStruct = doubling; - m_dupBuffer[0]->bDup = true; - m_dupBuffer[1]->bOccupied = false; - read++; - return 0; + else { + dropflag = true; } } - else if (m_dupBuffer[0]->bDup) + else if (m_param->bEnableFrameDuplication) { + psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic, m_dupBuffer[1]->dupPic, m_param); + if (psnrWeight >= m_param->dupThreshold) + dropflag = true; + } + + if (m_param->bEnableFrameDuplication) + { + if (dropflag) + { + if (m_dupBuffer[0]->bDup) + { + m_dupBuffer[0]->dupPic->picStruct = tripling; + m_dupBuffer[0]->bDup = false; + read++; + } + else + { + m_dupBuffer[0]->dupPic->picStruct = doubling; + m_dupBuffer[0]->bDup = true; + m_dupBuffer[1]->bOccupied = false; + read++; + return 0; + } + } + else if (m_dupBuffer[0]->bDup) m_dupBuffer[0]->bDup = false; - else - m_dupBuffer[0]->dupPic->picStruct = 0; + else + m_dupBuffer[0]->dupPic->picStruct = 0; + } + } if (read < written) @@ -1485,7 +1567,10 @@ inFrame->m_poc = ++m_pocLast; inFrame->m_userData = inputPic->userData; - inFrame->m_pts = inputPic->pts; + inFrame->m_pts = inputPic->pts; + if (m_param->bHistbasedScenecut) { + inFrame->m_lowres.bScenecut = inputPic->analysisData.bScenecut; + } inFrame->m_forceqp = inputPic->forceqp; inFrame->m_param = (m_reconfigure || m_reconfigureRc) ? m_latestParam : m_param; inFrame->m_picStruct = inputPic->picStruct; @@ -1613,7 +1698,7 @@ m_param->bUseRcStats = 0; } - if (m_param->bEnableFrameDuplication && ((read < written) || (m_dupBuffer[0]->dupPic->picStruct == tripling && (read <= written)))) + if ( (m_param->bEnableFrameDuplication || isScenecutEnabled) && ((read < written) || (m_dupBuffer[0]->dupPic->picStruct == tripling && (read <= written)))) { if (m_dupBuffer[0]->dupPic->picStruct == tripling) m_dupBuffer[0]->bOccupied = m_dupBuffer[1]->bOccupied = false; @@ -3162,6 +3247,7 @@ * adaptive I frame placement */ p->keyframeMax = INT_MAX; p->scenecutThreshold = 0; + p->bHistbasedScenecut = 0; } else if (p->keyframeMax <= 1) { @@ -3175,6 +3261,7 @@ p->lookaheadDepth = 0; p->bframes = 0; p->scenecutThreshold = 0; + p->bHistbasedScenecut = 0; p->bFrameAdaptive = 0; p->rc.cuTree = 0; p->bEnableWeightedPred = 0; @@ -3828,6 +3915,20 @@ m_param->searchMethod = m_param->hmeSearchMethod[2]; } } + + if (p->bHistbasedScenecut && p->scenecutThreshold) { + p->scenecutThreshold = 0; + p->bHistbasedScenecut = false; + x265_log(p, X265_LOG_WARNING, "Amibigious choice. disabling scene cut detection \n"); + } + else if (p->scenecutThreshold && p->edgeTransitionThreshold != 0.01) { + x265_log(p, X265_LOG_WARNING, "using scenecut-bias %d for scene cut detection\n",p->scenecutBias); + } + else if (p->bHistbasedScenecut && p->edgeTransitionThreshold == 0.0) { + p->edgeTransitionThreshold = 0.01; + x265_log(p, X265_LOG_INFO, "using default threshold %.2lf for scene cut detection\n", p->edgeTransitionThreshold); + } + } void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x265_picture* picIn, int paramBytes) diff -r 37648fca915b -r deaecadc4306 source/encoder/encoder.h --- a/source/encoder/encoder.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/encoder.h Tue Oct 15 12:16:17 2019 +0530 @@ -32,6 +32,8 @@ #include "nal.h" #include "framedata.h" #include "svt.h" +#include "scenecut.h" + #ifdef ENABLE_HDR10_PLUS #include "dynamicHDR10/hdr10plus.h" #endif @@ -154,6 +156,9 @@ //Flag to check whether the picture has duplicated. bool bDup; + + bool bufUpdated; + }; @@ -195,6 +200,9 @@ ThreadPool* m_threadPool; FrameEncoder* m_frameEncoder[X265_MAX_FRAME_THREADS]; + + YuvHistogram* m_hist_of_adj_frames; + sad_stats* m_sad_stats; DPB* m_dpb; Frame* m_exportedPic; FILE* m_analysisFileIn; @@ -279,6 +287,10 @@ if (m_prevTonemapPayload.payload != NULL) X265_FREE(m_prevTonemapPayload.payload); #endif + delete m_sad_stats; + m_sad_stats = NULL; + delete[] m_hist_of_adj_frames; + m_hist_of_adj_frames = NULL; }; void create(); @@ -349,6 +361,12 @@ void copyPicture(x265_picture *dest, const x265_picture *src); + void unsetPictureFlags(int index); + + void setPictureFlags(int index); + + void updateSceneCutAndFrameDuplicateFlags(); + void initRefIdx(); void analyseRefIdx(int *numRefIdx); void updateRefIdx(); @@ -364,6 +382,7 @@ void initSPS(SPS *sps); void initPPS(PPS *pps); }; + } #endif // ifndef X265_ENCODER_H diff -r 37648fca915b -r deaecadc4306 source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/ratecontrol.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -493,6 +493,7 @@ CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP); CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax); CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold); + CMP_OPT_FIRST_PASS("hist-threshold", m_param->edgeTransitionThreshold); CMP_OPT_FIRST_PASS("intra-refresh", m_param->bIntraRefresh); if (m_param->bMultiPassOptRPS) { @@ -1183,6 +1184,7 @@ m_param->rc.bStatRead = 0; m_param->bFrameAdaptive = 0; m_param->scenecutThreshold = 0; + m_param->bHistbasedScenecut = false; m_param->rc.cuTree = 0; if (m_param->bframes > 1) m_param->bframes = 1; @@ -2173,7 +2175,7 @@ if (m_isVbv && m_currentSatd > 0 && curFrame) { if (m_param->lookaheadDepth || m_param->rc.cuTree || - m_param->scenecutThreshold || + (m_param->scenecutThreshold || m_param->bHistbasedScenecut) || (m_param->bFrameAdaptive && m_param->bframes)) { /* Lookahead VBV: If lookahead is done, raise the quantizer as necessary diff -r 37648fca915b -r deaecadc4306 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/slicetype.cpp Tue Oct 15 12:16:17 2019 +0530 @@ -30,6 +30,7 @@ #include "primitives.h" #include "lowres.h" #include "mv.h" +#include "scenecut.h" #include "slicetype.h" #include "motion.h" @@ -114,8 +115,8 @@ //Applying Gaussian filter on the picture src = (pixel*)curFrame->m_fencPic->m_picOrg[0]; refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX; + edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX; pixel pixelValue = 0; - for (int rowNum = 0; rowNum < height; rowNum++) { for (int colNum = 0; colNum < width; colNum++) @@ -127,7 +128,8 @@ 1 [4 9 12 9 4] --- [5 12 15 12 5] 159 [4 9 12 9 4] - [2 4 5 4 2]*/ + [2 4 5 4 2] + */ const intptr_t rowOne = (rowNum - 2)*stride, colOne = colNum - 2; const intptr_t rowTwo = (rowNum - 1)*stride, colTwo = colNum - 1; @@ -145,52 +147,7 @@ } } } - -#if HIGH_BIT_DEPTH //10-bit build - float threshold = 1023; - pixel whitePixel = 1023; -#else - float threshold = 255; - pixel whitePixel = 255; -#endif -#define PI 3.14159265 - - float gradientH = 0, gradientV = 0, radians = 0, theta = 0; - float gradientMagnitude = 0; - pixel blackPixel = 0; - edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX; - //Applying Sobel filter on the gaussian filtered picture - for (int rowNum = 0; rowNum < height; rowNum++) - { - for (int colNum = 0; colNum < width; colNum++) - { - edgeTheta[(rowNum*stride) + colNum] = 0; - if ((rowNum != 0) && (colNum != 0) && (rowNum != height - 1) && (colNum != width - 1)) //Ignoring the border pixels of the picture - { - /*Horizontal and vertical gradients - [ -3 0 3 ] [-3 -10 -3 ] - gH = [ -10 0 10] gV = [ 0 0 0 ] - [ -3 0 3 ] [ 3 10 3 ]*/ - - const intptr_t rowOne = (rowNum - 1)*stride, colOne = colNum -1; - const intptr_t rowTwo = rowNum * stride, colTwo = colNum; - const intptr_t rowThree = (rowNum + 1)*stride, colThree = colNum + 1; - const intptr_t index = (rowNum*stride) + colNum; - - gradientH = (float)(-3 * refPic[rowOne + colOne] + 3 * refPic[rowOne + colThree] - 10 * refPic[rowTwo + colOne] + 10 * refPic[rowTwo + colThree] - 3 * refPic[rowThree + colOne] + 3 * refPic[rowThree + colThree]); - gradientV = (float)(-3 * refPic[rowOne + colOne] - 10 * refPic[rowOne + colTwo] - 3 * refPic[rowOne + colThree] + 3 * refPic[rowThree + colOne] + 10 * refPic[rowThree + colTwo] + 3 * refPic[rowThree + colThree]); - - gradientMagnitude = sqrtf(gradientH * gradientH + gradientV * gradientV); - radians = atan2(gradientV, gradientH); - theta = (float)((radians * 180) / PI); - if (theta < 0) - theta = 180 + theta; - edgeTheta[(rowNum*stride) + colNum] = (pixel)theta; - - edgePic[index] = gradientMagnitude >= threshold ? whitePixel : blackPixel; - } - } - } + computeEdge(edgePic, refPic, edgeTheta, stride, height, width); } //Find the angle of a block by averaging the pixel angles @@ -1471,7 +1428,7 @@ if (m_lastNonB && !m_param->rc.bStatRead && ((m_param->bFrameAdaptive && m_param->bframes) || - m_param->rc.cuTree || m_param->scenecutThreshold || + m_param->rc.cuTree || m_param->scenecutThreshold || m_param->bHistbasedScenecut || (m_param->lookaheadDepth && m_param->rc.vbvBufferSize))) { slicetypeAnalyse(frames, false); @@ -1962,10 +1919,15 @@ int numBFrames = 0; int numAnalyzed = numFrames; - bool isScenecut = scenecut(frames, 0, 1, true, origNumFrames); + bool isScenecut = false; /* When scenecut threshold is set, use scenecut detection for I frame placements */ - if (m_param->scenecutThreshold && isScenecut) + if (m_param->scenecutThreshold) + isScenecut = scenecut(frames, 0, 1, true, origNumFrames); + else if (m_param->bHistbasedScenecut) + isScenecut = frames[1]->bScenecut; + + if (isScenecut) { frames[1]->sliceType = X265_TYPE_I; return; @@ -1976,14 +1938,24 @@ m_extendGopBoundary = false; for (int i = m_param->bframes + 1; i < origNumFrames; i += m_param->bframes + 1) { - scenecut(frames, i, i + 1, true, origNumFrames); + if (m_param->scenecutThreshold) + scenecut(frames, i, i + 1, true, origNumFrames); + for (int j = i + 1; j <= X265_MIN(i + m_param->bframes + 1, origNumFrames); j++) { - if (frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true) ) - { - m_extendGopBoundary = true; - break; - } + if (m_param->scenecutThreshold) + { + if (frames[j]->bScenecut && scenecutInternal(frames, j - 1, j, true)) + { + m_extendGopBoundary = true; + break; + } + } + else if(m_param->bHistbasedScenecut && frames[j]->bScenecut) + { + m_extendGopBoundary = true; + break; + } } if (m_extendGopBoundary) break; @@ -2088,13 +2060,23 @@ { for (int j = 1; j < numBFrames + 1; j++) { - if (scenecut(frames, j, j + 1, false, origNumFrames) || - (bForceRADL && (frames[j]->frameNum == preRADL))) - { - frames[j]->sliceType = X265_TYPE_P; - numAnalyzed = j; - break; + if (m_param->bHistbasedScenecut) { + if (frames[j]->bScenecut || (bForceRADL && (frames[j]->frameNum == preRADL))) + { + frames[j]->sliceType = X265_TYPE_P; + numAnalyzed = j; + break; + } } + else if (m_param->scenecutThreshold){ + if ( scenecut(frames, j, j + 1, false, origNumFrames) || (bForceRADL && (frames[j]->frameNum == preRADL)) ) + { + frames[j]->sliceType = X265_TYPE_P; + numAnalyzed = j; + break; + } + } + } } resetStart = bKeyframe ? 1 : X265_MIN(numBFrames + 2, numAnalyzed + 1); diff -r 37648fca915b -r deaecadc4306 source/encoder/slicetype.h --- a/source/encoder/slicetype.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/encoder/slicetype.h Tue Oct 15 12:16:17 2019 +0530 @@ -43,6 +43,14 @@ #define AQ_EDGE_BIAS 0.5 #define EDGE_INCLINATION 45 +#ifdef HIGH_BIT_DEPTH +#define edge_threshold 1023.0 +#define whitePixel 1023.0 +#else +#define edge_threshold 255.0 +#define pixel whitePixel 255.0 +#endif + /* Thread local data for lookahead tasks */ struct LookaheadTLD { diff -r 37648fca915b -r deaecadc4306 source/test/regression-tests.txt --- a/source/test/regression-tests.txt Fri Oct 11 12:45:52 2019 +0530 +++ b/source/test/regression-tests.txt Tue Oct 15 12:16:17 2019 +0530 @@ -157,6 +157,9 @@ ducks_take_off_420_720p50.y4m,--preset medium --aq-mode 4 --crf 22 --no-cutree ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao --crf 20 Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.01 +Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 --hist-scenecut --hist-threshold 0.01 +sintel_trailer_2k_1920x1080_24.yuv, --preset medium --scenecut 40 --scenecut-bias 20 # Main12 intraCost overflow bug test 720p50_parkrun_ter.y4m,--preset medium diff -r 37648fca915b -r deaecadc4306 source/x265.h --- a/source/x265.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/x265.h Tue Oct 15 12:16:17 2019 +0530 @@ -210,7 +210,9 @@ uint32_t numCUsInFrame; uint32_t numPartitions; uint32_t depthBytes; - int bScenecut; + bool bScenecut; + double edgeSadValue; + double chromaSadValue; x265_weight_param* wt; x265_analysis_inter_data* interData; x265_analysis_intra_data* intraData; @@ -291,6 +293,9 @@ char sliceType; int bScenecut; double ipCostRatio; + double yedgeSadValue; + double chromaSadValue; + int frameLatency; x265_cu_stats cuStats; x265_pu_stats puStats; @@ -465,6 +470,9 @@ //Dolby Vision RPU metadata x265_dolby_vision_rpu rpu; + //Flag to determine the latest frame in the buffer + bool bufUpdated; + int fieldNum; //SEI picture structure message @@ -1017,8 +1025,9 @@ * decisions. Default is 0 - disabled. 1 is the same as 0. Max 16 */ int lookaheadSlices; - /* An arbitrary threshold which determines how aggressively the lookahead - * should detect scene cuts. The default (40) is recommended. */ + /* An arbitrary threshold which determines how aggressively the lookahead + * should detect scene cuts. The default (40) is recommended. + * Used for encoding cost based scenecut detection */ int scenecutThreshold; /* Replace keyframes by using a column of intra blocks that move across the video @@ -1803,6 +1812,7 @@ /*Emit content light level info SEI*/ int bEmitCLL; + /* * Signals picture structure SEI timing message for every frame @@ -1819,6 +1829,17 @@ /*Input sequence bit depth. It can be either 8bit, 10bit or 12bit.*/ int sourceBitDepth; + + /* A genuine threshold which determines whether a frame is a scenecut or not + * when compared against edge and color sad values of a frames histograms.Default 0.01 + * Range:real number in range (0,2) + * Used for histogram based scene cut detection */ + double edgeTransitionThreshold; + + /*enables improved scenecut detection algorithm to detect scenecuts for slice type + decision and rate control */ + bool bHistbasedScenecut; + } x265_param; /* x265_param_alloc: * Allocates an x265_param instance. The returned param structure is not diff -r 37648fca915b -r deaecadc4306 source/x265cli.h --- a/source/x265cli.h Fri Oct 11 12:45:52 2019 +0530 +++ b/source/x265cli.h Tue Oct 15 12:16:17 2019 +0530 @@ -129,6 +129,9 @@ { "scenecut", required_argument, NULL, 0 }, { "no-scenecut", no_argument, NULL, 0 }, { "scenecut-bias", required_argument, NULL, 0 }, + { "hist-scenecut", no_argument, NULL, 0}, + { "no-hist-scenecut", no_argument, NULL, 0}, + { "hist-threshold", required_argument, NULL, 0}, { "fades", no_argument, NULL, 0 }, { "no-fades", no_argument, NULL, 0 }, { "radl", required_argument, NULL, 0 }, @@ -485,7 +488,10 @@ H0(" --gop-lookahead <integer> Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n"); H0(" --no-scenecut Disable adaptive I-frame decision\n"); H0(" --scenecut <integer> How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold); - H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); + H0(" --hist-scenecut ..... Enables improved scene-cut detection using histogram based algorithm."); + H0(" --no-hist-scenecut Disables improved scene-cut detection using histogram based algorithm. "); + H0(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); + H0(" --hist-threshold <0.0..2.0> Threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold); H0(" --[no-]fades Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades)); H0(" --radl <integer> Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl); H0(" --intra-refresh Use Periodic Intra Refresh instead of IDR frames\n");
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel