From 39fb570c61906a087198c960632a918907eafb2a Mon Sep 17 00:00:00 2001
From: ashok2022 <ashok@multicorewareinc.com>
Date: Wed, 26 Oct 2022 12:25:21 +0530
Subject: [PATCH] Compute picture variance, pixel intensity for histogram based
 scene change

Signed-off-by: Snehaa Giridharan <snehaa@multicorewareinc.com>
---
 source/common/frame.cpp      |   3 +-
 source/common/lowres.cpp     |  94 +++++++++-
 source/common/lowres.h       |  25 ++-
 source/encoder/slicetype.cpp | 320 +++++++++++++++++++++++++++++++++++
 source/encoder/slicetype.h   |  48 ++++++
 5 files changed, 485 insertions(+), 5 deletions(-)

diff --git a/source/common/frame.cpp b/source/common/frame.cpp
index f51270bf0..d7f8ce2a8 100644
--- a/source/common/frame.cpp
+++ b/source/common/frame.cpp
@@ -330,7 +330,8 @@ void Frame::destroy()
         X265_FREE(m_addOnPrevChange);
         m_addOnPrevChange = NULL;
     }
-    m_lowres.destroy();
+
+    m_lowres.destroy(m_param);
     X265_FREE(m_rcData);
 
     if (m_param->bDynamicRefine)
diff --git a/source/common/lowres.cpp b/source/common/lowres.cpp
index 56b98b364..16ff02509 100644
--- a/source/common/lowres.cpp
+++ b/source/common/lowres.cpp
@@ -28,6 +28,28 @@
 
 using namespace X265_NS;
 
+/*
+ * Down Sample input picture
+ */
+static
+void frame_lowres_core(const pixel* src0, pixel* dst0,
+    intptr_t src_stride, intptr_t dst_stride, int width, int height)
+{
+    for (int y = 0; y < height; y++)
+    {
+        const pixel* src1 = src0 + src_stride;
+        for (int x = 0; x < width; x++)
+        {
+            // slower than naive bilinear, but matches asm
+#define FILTER(a, b, c, d) ((((a + b + 1) >> 1) + ((c + d + 1) >> 1) + 1) >> 1)
+            dst0[x] = FILTER(src0[2 * x], src1[2 * x], src0[2 * x + 1], src1[2 * x + 1]);
+#undef FILTER
+        }
+        src0 += src_stride * 2;
+        dst0 += dst_stride;
+    }
+}
+
 bool PicQPAdaptationLayer::create(uint32_t width, uint32_t height, uint32_t partWidth, uint32_t partHeight, uint32_t numAQPartInWidthExt, uint32_t numAQPartInHeightExt)
 {
     aqPartWidth = partWidth;
@@ -193,13 +215,45 @@ bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
     if (param->rc.frameSegment)
         lowresEdgePlane = X265_MALLOC(pixel, lumaStride * (lines + (origPic->m_lumaMarginY * 2)));
 
+    if (param->bHistBasedSceneCut)
+    {
+        quarterSampleLowResWidth = widthFullRes / 4;
+        quarterSampleLowResHeight = heightFullRes / 4;
+        quarterSampleLowResOriginX = 16;
+        quarterSampleLowResOriginY = 16;
+        quarterSampleLowResStrideY = quarterSampleLowResWidth + 2 * quarterSampleLowResOriginY;
+
+        size_t quarterSampleLowResPlanesize = quarterSampleLowResStrideY * (quarterSampleLowResHeight + 2 * quarterSampleLowResOriginX);
+        /* allocate quarter sampled lowres buffers */
+        CHECKED_MALLOC_ZERO(quarterSampleLowResBuffer, pixel, quarterSampleLowResPlanesize);
+
+        // Allocate memory for Histograms
+        picHistogram = X265_MALLOC(uint32_t***, NUMBER_OF_SEGMENTS_IN_WIDTH * sizeof(uint32_t***));
+        picHistogram[0] = X265_MALLOC(uint32_t**, NUMBER_OF_SEGMENTS_IN_WIDTH * NUMBER_OF_SEGMENTS_IN_HEIGHT);
+        for (uint32_t wd = 1; wd < NUMBER_OF_SEGMENTS_IN_WIDTH; wd++) {
+            picHistogram[wd] = picHistogram[0] + wd * NUMBER_OF_SEGMENTS_IN_HEIGHT;
+        }
+
+        for (uint32_t regionInPictureWidthIndex = 0; regionInPictureWidthIndex < NUMBER_OF_SEGMENTS_IN_WIDTH; regionInPictureWidthIndex++)
+        {
+            for (uint32_t regionInPictureHeightIndex = 0; regionInPictureHeightIndex < NUMBER_OF_SEGMENTS_IN_HEIGHT; regionInPictureHeightIndex++)
+            {
+                picHistogram[regionInPictureWidthIndex][regionInPictureHeightIndex] = X265_MALLOC(uint32_t*, NUMBER_OF_SEGMENTS_IN_WIDTH *sizeof(uint32_t*));
+                picHistogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0] = X265_MALLOC(uint32_t, 3 * HISTOGRAM_NUMBER_OF_BINS * sizeof(uint32_t));
+                for (uint32_t wd = 1; wd < 3; wd++) {
+                    picHistogram[regionInPictureWidthIndex][regionInPictureHeightIndex][wd] = picHistogram[regionInPictureWidthIndex][regionInPictureHeightIndex][0] + wd * HISTOGRAM_NUMBER_OF_BINS;
+                }
+            }
+        }
+    }
+
     return true;
 
 fail:
     return false;
 }
 
-void Lowres::destroy()
+void Lowres::destroy(x265_param* param)
 {
     X265_FREE(buffer[0]);
     if(bEnableHME)
@@ -237,7 +291,8 @@ void Lowres::destroy()
     X265_FREE(invQscaleFactor8x8);
     X265_FREE(edgeInclined);
     X265_FREE(qpAqMotionOffset);
-    X265_FREE(blockVariance);
+    if (param->bDynamicRefine || param->bEnableFades)
+        X265_FREE(blockVariance);
 
     if (maxAQDepth > 0)
     {
@@ -258,6 +313,29 @@ void Lowres::destroy()
 
         delete[] pAQLayer;
     }
+
+    // Histograms
+    if (param->bHistBasedSceneCut)
+    {
+        for (uint32_t segmentInFrameWidthIdx = 0; segmentInFrameWidthIdx < NUMBER_OF_SEGMENTS_IN_WIDTH; segmentInFrameWidthIdx++)
+        {
+            if (picHistogram[segmentInFrameWidthIdx])
+            {
+                for (uint32_t segmentInFrameHeightIdx = 0; segmentInFrameHeightIdx < NUMBER_OF_SEGMENTS_IN_HEIGHT; segmentInFrameHeightIdx++)
+                {
+                    if (picHistogram[segmentInFrameWidthIdx][segmentInFrameHeightIdx])
+                        X265_FREE(picHistogram[segmentInFrameWidthIdx][segmentInFrameHeightIdx][0]);
+                    X265_FREE(picHistogram[segmentInFrameWidthIdx][segmentInFrameHeightIdx]);
+                }
+            }
+        }
+        if (picHistogram)
+            X265_FREE(picHistogram[0]);
+        X265_FREE(picHistogram);
+
+        X265_FREE(quarterSampleLowResBuffer);
+
+    }
 }
 // (re) initialize lowres state
 void Lowres::init(PicYuv *origPic, int poc)
@@ -314,4 +392,16 @@ void Lowres::init(PicYuv *origPic, int poc)
     }
 
     fpelPlane[0] = lowresPlane[0];
+
+    if (origPic->m_param->bHistBasedSceneCut)
+    {
+        // Quarter Sampled Input Picture Formation
+        // TO DO: Replace with ASM function
+        frame_lowres_core(
+            lowresPlane[0],
+            quarterSampleLowResBuffer + quarterSampleLowResOriginX + quarterSampleLowResOriginY * quarterSampleLowResStrideY,
+            lumaStride,
+            quarterSampleLowResStrideY,
+            widthFullRes / 4, heightFullRes / 4);
+    }
 }
diff --git a/source/common/lowres.h b/source/common/lowres.h
index 8adb93a81..c2ae270e3 100644
--- a/source/common/lowres.h
+++ b/source/common/lowres.h
@@ -32,6 +32,10 @@
 namespace X265_NS {
 // private namespace
 
+#define HISTOGRAM_NUMBER_OF_BINS         256
+#define NUMBER_OF_SEGMENTS_IN_WIDTH      4
+#define NUMBER_OF_SEGMENTS_IN_HEIGHT     4
+
 struct ReferencePlanes
 {
     ReferencePlanes() { memset(this, 0, sizeof(ReferencePlanes)); }
@@ -233,13 +237,30 @@ struct Lowres : public ReferencePlanes
     uint32_t heightFullRes;
     uint32_t m_maxCUSize;
     uint32_t m_qgSize;
-    
+
     uint16_t* propagateCost;
     double    weightedCostDelta[X265_BFRAME_MAX + 2];
     ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
 
+    /* For hist-based scenecut */
+    int          quarterSampleLowResWidth;     // width of 1/4 lowres frame in pixels
+    int          quarterSampleLowResHeight;    // height of 1/4 lowres frame in pixels
+    int          quarterSampleLowResStrideY;
+    int          quarterSampleLowResOriginX;
+    int          quarterSampleLowResOriginY;
+    pixel       *quarterSampleLowResBuffer;
+    bool         bHistScenecutAnalyzed;
+
+    uint16_t     picAvgVariance;
+    uint16_t     picAvgVarianceCb;
+    uint16_t     picAvgVarianceCr;
+
+    uint32_t ****picHistogram;
+    uint64_t     averageIntensityPerSegment[NUMBER_OF_SEGMENTS_IN_WIDTH][NUMBER_OF_SEGMENTS_IN_HEIGHT][3];
+    uint8_t      averageIntensity[3];
+
     bool create(x265_param* param, PicYuv *origPic, uint32_t qgSize);
-    void destroy();
+    void destroy(x265_param* param);
     void init(PicYuv *origPic, int poc);
 };
 }
diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
index 1ff5abcc8..9343593cc 100644
--- a/source/encoder/slicetype.cpp
+++ b/source/encoder/slicetype.cpp
@@ -45,6 +45,14 @@ using namespace X265_NS;
 
 namespace {
 
+uint32_t acEnergyVarHist(uint64_t sum_ssd, int shift)
+{
+    uint32_t sum = (uint32_t)sum_ssd;
+    uint32_t ssd = (uint32_t)(sum_ssd >> 32);
+
+    return ssd - ((uint64_t)sum * sum >> shift);
+}
+
 /* Compute variance to derive AC energy of each block */
 inline uint32_t acEnergyVar(Frame *curFrame, uint64_t sum_ssd, int shift, int plane)
 {
@@ -1050,6 +1058,30 @@ Lookahead::Lookahead(x265_param *param, ThreadPool* pool)
     m_countPreLookahead = 0;
 #endif
 
+    m_accHistDiffRunningAvgCb = X265_MALLOC(uint32_t*, NUMBER_OF_SEGMENTS_IN_WIDTH * sizeof(uint32_t*));
+    m_accHistDiffRunningAvgCb[0] = X265_MALLOC(uint32_t, NUMBER_OF_SEGMENTS_IN_WIDTH * NUMBER_OF_SEGMENTS_IN_HEIGHT);
+    memset(m_accHistDiffRunningAvgCb[0], 0, sizeof(uint32_t) * NUMBER_OF_SEGMENTS_IN_WIDTH * NUMBER_OF_SEGMENTS_IN_HEIGHT);
+    for (uint32_t w = 1; w < NUMBER_OF_SEGMENTS_IN_WIDTH; w++) {
+        m_accHistDiffRunningAvgCb[w] = m_accHistDiffRunningAvgCb[0] + w * NUMBER_OF_SEGMENTS_IN_HEIGHT;
+    }
+
+    m_accHistDiffRunningAvgCr = X265_MALLOC(uint32_t*, NUMBER_OF_SEGMENTS_IN_WIDTH * sizeof(uint32_t*));
+    m_accHistDiffRunningAvgCr[0] = X265_MALLOC(uint32_t, NUMBER_OF_SEGMENTS_IN_WIDTH * NUMBER_OF_SEGMENTS_IN_HEIGHT);
+    memset(m_accHistDiffRunningAvgCr[0], 0, sizeof(uint32_t) * NUMBER_OF_SEGMENTS_IN_WIDTH * NUMBER_OF_SEGMENTS_IN_HEIGHT);
+    for (uint32_t w = 1; w < NUMBER_OF_SEGMENTS_IN_WIDTH; w++) {
+        m_accHistDiffRunningAvgCr[w] = m_accHistDiffRunningAvgCr[0] + w * NUMBER_OF_SEGMENTS_IN_HEIGHT;
+    }
+
+    m_accHistDiffRunningAvg = X265_MALLOC(uint32_t*, NUMBER_OF_SEGMENTS_IN_WIDTH * sizeof(uint32_t*));
+    m_accHistDiffRunningAvg[0] = X265_MALLOC(uint32_t, NUMBER_OF_SEGMENTS_IN_WIDTH * NUMBER_OF_SEGMENTS_IN_HEIGHT);
+    memset(m_accHistDiffRunningAvg[0], 0, sizeof(uint32_t) * NUMBER_OF_SEGMENTS_IN_WIDTH * NUMBER_OF_SEGMENTS_IN_HEIGHT);
+    for (uint32_t w = 1; w < NUMBER_OF_SEGMENTS_IN_WIDTH; w++) {
+        m_accHistDiffRunningAvg[w] = m_accHistDiffRunningAvg[0] + w * NUMBER_OF_SEGMENTS_IN_HEIGHT;
+    }
+
+    m_resetRunningAvg = true;
+
+    m_segmentCountThreshold = (uint32_t)(((float)((NUMBER_OF_SEGMENTS_IN_WIDTH * NUMBER_OF_SEGMENTS_IN_HEIGHT) * 50) / 100) + 0.5);
 }
 
 #if DETAILED_CU_STATS
@@ -1415,6 +1447,290 @@ double computeEdgeIntensity(pixel *inPlane, int width, int height, intptr_t stri
     return (count / (width * height)) * 100;
 }
 
+uint32_t LookaheadTLD::calcVariance(pixel* inpSrc, intptr_t stride, intptr_t blockOffset, uint32_t plane)
+{
+    pixel* src = inpSrc + blockOffset;
+
+    uint32_t var;
+    if (!plane)
+        var = acEnergyVarHist(primitives.cu[BLOCK_8x8].var(src, stride), 6);
+    else
+        var = acEnergyVarHist(primitives.cu[BLOCK_4x4].var(src, stride), 4);
+
+    x265_emms();
+    return var;
+}
+
+/*
+** Compute Block and Picture Variance, Block Mean for all blocks in the picture
+*/
+void LookaheadTLD::computePictureStatistics(Frame *curFrame)
+{
+    int maxCol = curFrame->m_fencPic->m_picWidth;
+    int maxRow = curFrame->m_fencPic->m_picHeight;
+    intptr_t inpStride = curFrame->m_fencPic->m_stride;
+
+    // Variance
+    uint64_t picTotVariance = 0;
+    uint32_t variance;
+
+    uint64_t blockXY = 0;
+    pixel* src = curFrame->m_fencPic->m_picOrg[0];
+
+    for (int blockY = 0; blockY < maxRow; blockY += 8)
+    {
+        uint64_t rowVariance = 0;
+        for (int blockX = 0; blockX < maxCol; blockX += 8)
+        {
+            intptr_t blockOffsetLuma = blockX + (blockY * inpStride);
+
+            variance = calcVariance(
+                src,
+                inpStride,
+                blockOffsetLuma, 0);
+
+            rowVariance += variance;
+            blockXY++;
+        }
+        picTotVariance += (uint16_t)(rowVariance / maxCol);
+    }
+
+    curFrame->m_lowres.picAvgVariance = (uint16_t)(picTotVariance / maxRow);
+
+    // Collect chroma variance
+    int hShift = curFrame->m_fencPic->m_hChromaShift;
+    int vShift = curFrame->m_fencPic->m_vChromaShift;
+
+    int maxColChroma = curFrame->m_fencPic->m_picWidth >> hShift;
+    int maxRowChroma = curFrame->m_fencPic->m_picHeight >> vShift;
+    intptr_t cStride = curFrame->m_fencPic->m_strideC;
+
+    pixel* srcCb = curFrame->m_fencPic->m_picOrg[1];
+
+    picTotVariance = 0;
+    for (int blockY = 0; blockY < maxRowChroma; blockY += 4)
+    {
+        uint64_t rowVariance = 0;
+        for (int blockX = 0; blockX < maxColChroma; blockX += 4)
+        {
+            intptr_t blockOffsetChroma = blockX + blockY * cStride;
+
+            variance = calcVariance(
+                srcCb,
+                cStride,
+                blockOffsetChroma, 1);
+
+            rowVariance += variance;
+            blockXY++;
+        }
+        picTotVariance += (uint16_t)(rowVariance / maxColChroma);
+    }
+
+    curFrame->m_lowres.picAvgVarianceCb = (uint16_t)(picTotVariance / maxRowChroma);
+
+
+    pixel* srcCr = curFrame->m_fencPic->m_picOrg[2];
+
+    picTotVariance = 0;
+    for (int blockY = 0; blockY < maxRowChroma; blockY += 4)
+    {
+        uint64_t rowVariance = 0;
+        for (int blockX = 0; blockX < maxColChroma; blockX += 4)
+        {
+            intptr_t blockOffsetChroma = blockX + blockY * cStride;
+
+            variance = calcVariance(
+                srcCr,
+                cStride,
+                blockOffsetChroma, 2);
+
+            rowVariance += variance;
+            blockXY++;
+        }
+        picTotVariance += (uint16_t)(rowVariance / maxColChroma);
+    }
+
+    curFrame->m_lowres.picAvgVarianceCr = (uint16_t)(picTotVariance / maxRowChroma);
+}
+
+/*
+* Compute histogram of n-bins for the input
+*/
+void LookaheadTLD::calculateHistogram(
+    pixel     *inputSrc,
+    uint32_t   inputWidth,
+    uint32_t   inputHeight,
+    intptr_t   stride,
+    uint8_t    dsFactor,
+    uint32_t  *histogram,
+    uint64_t  *sum)
+
+{
+    *sum = 0;
+
+    for (uint32_t verticalIdx = 0; verticalIdx < inputHeight; verticalIdx += dsFactor)
+    {
+        for (uint32_t horizontalIdx = 0; horizontalIdx < inputWidth; horizontalIdx += dsFactor)
+        {
+            ++(histogram[inputSrc[horizontalIdx]]);
+            *sum += inputSrc[horizontalIdx];
+        }
+        inputSrc += (stride << (dsFactor >> 1));
+    }
+
+    return;
+}
+
+/*
+* Compute histogram bins and chroma pixel intensity *
+*/
+void LookaheadTLD::computeIntensityHistogramBinsChroma(
+    Frame    *curFrame,
+    uint64_t *sumAverageIntensityCb,
+    uint64_t *sumAverageIntensityCr)
+{
+    uint64_t    sum;
+    uint8_t     dsFactor = 4;
+
+    uint32_t segmentWidth = curFrame->m_lowres.widthFullRes / NUMBER_OF_SEGMENTS_IN_WIDTH;
+    uint32_t segmentHeight = curFrame->m_lowres.heightFullRes / NUMBER_OF_SEGMENTS_IN_HEIGHT;
+
+    for (uint32_t segmentInFrameWidthIndex = 0; segmentInFrameWidthIndex < NUMBER_OF_SEGMENTS_IN_WIDTH; segmentInFrameWidthIndex++)
+    {
+        for (uint32_t segmentInFrameHeightIndex = 0; segmentInFrameHeightIndex < NUMBER_OF_SEGMENTS_IN_HEIGHT; segmentInFrameHeightIndex++)
+        {
+            // Initialize bins to 1
+            for (uint32_t cuIndex = 0; cuIndex < 256; cuIndex++) {
+                curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][1][cuIndex] = 1;
+                curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][2][cuIndex] = 1;
+            }
+
+            uint32_t segmentWidthOffset = (segmentInFrameWidthIndex == NUMBER_OF_SEGMENTS_IN_WIDTH - 1) ?
+                curFrame->m_lowres.widthFullRes - (NUMBER_OF_SEGMENTS_IN_WIDTH * segmentWidth) : 0;
+
+            uint32_t segmentHeightOffset = (segmentInFrameHeightIndex == NUMBER_OF_SEGMENTS_IN_HEIGHT - 1) ?
+                curFrame->m_lowres.heightFullRes - (NUMBER_OF_SEGMENTS_IN_HEIGHT * segmentHeight) : 0;
+
+
+            // U Histogram
+            calculateHistogram(
+                curFrame->m_fencPic->m_picOrg[1] + ((segmentInFrameWidthIndex * segmentWidth) >> 1) + (((segmentInFrameHeightIndex * segmentHeight) >> 1) * curFrame->m_fencPic->m_strideC),
+                (segmentWidth + segmentWidthOffset) >> 1,
+                (segmentHeight + segmentHeightOffset) >> 1,
+                curFrame->m_fencPic->m_strideC,
+                dsFactor,
+                curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][1],
+                &sum);
+
+            sum = (sum << dsFactor);
+            *sumAverageIntensityCb += sum;
+            curFrame->m_lowres.averageIntensityPerSegment[segmentInFrameWidthIndex][segmentInFrameHeightIndex][1] =
+                (uint8_t)((sum + (((segmentWidth + segmentWidthOffset) * (segmentHeight + segmentHeightOffset)) >> 3)) / (((segmentWidth + segmentWidthOffset) * (segmentHeight + segmentHeightOffset)) >> 2));
+
+            for (uint16_t histogramBin = 0; histogramBin < HISTOGRAM_NUMBER_OF_BINS; histogramBin++) {
+                curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][1][histogramBin] =
+                    curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][1][histogramBin] << dsFactor;
+            }
+
+            // V Histogram
+            calculateHistogram(
+                curFrame->m_fencPic->m_picOrg[2] + ((segmentInFrameWidthIndex * segmentWidth) >> 1) + (((segmentInFrameHeightIndex * segmentHeight) >> 1) * curFrame->m_fencPic->m_strideC),
+                (segmentWidth + segmentWidthOffset) >> 1,
+                (segmentHeight + segmentHeightOffset) >> 1,
+                curFrame->m_fencPic->m_strideC,
+                dsFactor,
+                curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][2],
+                &sum);
+
+            sum = (sum << dsFactor);
+            *sumAverageIntensityCr += sum;
+            curFrame->m_lowres.averageIntensityPerSegment[segmentInFrameWidthIndex][segmentInFrameHeightIndex][2] =
+                (uint8_t)((sum + (((segmentWidth + segmentWidthOffset) * (segmentHeight + segmentHeightOffset)) >> 3)) / (((segmentWidth + segmentHeightOffset) * (segmentHeight + segmentHeightOffset)) >> 2));
+
+            for (uint16_t histogramBin = 0; histogramBin < HISTOGRAM_NUMBER_OF_BINS; histogramBin++) {
+                curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][2][histogramBin] =
+                    curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][2][histogramBin] << dsFactor;
+            }
+        }
+    }
+    return;
+
+}
+
+/*
+* Compute histogram bins and luma pixel intensity *
+*/
+void LookaheadTLD::computeIntensityHistogramBinsLuma(
+    Frame    *curFrame,
+    uint64_t *sumAvgIntensityTotalSegmentsLuma)
+{
+    uint64_t sum;
+
+    uint32_t segmentWidth = curFrame->m_lowres.quarterSampleLowResWidth / NUMBER_OF_SEGMENTS_IN_WIDTH;
+    uint32_t segmentHeight = curFrame->m_lowres.quarterSampleLowResHeight / NUMBER_OF_SEGMENTS_IN_HEIGHT;
+
+    for (uint32_t segmentInFrameWidthIndex = 0; segmentInFrameWidthIndex < NUMBER_OF_SEGMENTS_IN_WIDTH; segmentInFrameWidthIndex++)
+    {
+        for (uint32_t segmentInFrameHeightIndex = 0; segmentInFrameHeightIndex < NUMBER_OF_SEGMENTS_IN_HEIGHT; segmentInFrameHeightIndex++)
+        {
+            // Initialize bins to 1
+            for (uint32_t cuIndex = 0; cuIndex < 256; cuIndex++) {
+                curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][0][cuIndex] = 1;
+            }
+
+            uint32_t segmentWidthOffset = (segmentInFrameWidthIndex == NUMBER_OF_SEGMENTS_IN_WIDTH - 1) ?
+                curFrame->m_lowres.quarterSampleLowResWidth - (NUMBER_OF_SEGMENTS_IN_WIDTH * segmentWidth) : 0;
+
+            uint32_t segmentHeightOffset = (segmentInFrameHeightIndex == NUMBER_OF_SEGMENTS_IN_HEIGHT - 1) ?
+                curFrame->m_lowres.quarterSampleLowResHeight - (NUMBER_OF_SEGMENTS_IN_HEIGHT * segmentHeight) : 0;
+
+            // Y Histogram
+            calculateHistogram(
+                curFrame->m_lowres.quarterSampleLowResBuffer + (curFrame->m_lowres.quarterSampleLowResOriginX + segmentInFrameWidthIndex * segmentWidth) + ((curFrame->m_lowres.quarterSampleLowResOriginY + segmentInFrameHeightIndex * segmentHeight) * curFrame->m_lowres.quarterSampleLowResStrideY),
+                segmentWidth + segmentWidthOffset,
+                segmentHeight + segmentHeightOffset,
+                curFrame->m_lowres.quarterSampleLowResStrideY,
+                1,
+                curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][0],
+                &sum);
+
+            curFrame->m_lowres.averageIntensityPerSegment[segmentInFrameWidthIndex][segmentInFrameHeightIndex][0] = (uint8_t)((sum + (((segmentWidth + segmentWidthOffset)*(segmentWidth + segmentHeightOffset)) >> 1)) / ((segmentWidth + segmentWidthOffset)*(segmentHeight + segmentHeightOffset)));
+            (*sumAvgIntensityTotalSegmentsLuma) += (sum << 4);
+            for (uint32_t histogramBin = 0; histogramBin < HISTOGRAM_NUMBER_OF_BINS; histogramBin++)
+            {
+                curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][0][histogramBin] =
+                    curFrame->m_lowres.picHistogram[segmentInFrameWidthIndex][segmentInFrameHeightIndex][0][histogramBin] << 4;
+            }
+        }
+    }
+}
+
+void LookaheadTLD::collectPictureStatistics(Frame *curFrame)
+{
+
+    uint64_t sumAverageIntensityCb = 0;
+    uint64_t sumAverageIntensityCr = 0;
+    uint64_t sumAverageIntensity = 0;
+
+    // Histogram bins for Luma
+    computeIntensityHistogramBinsLuma(
+        curFrame,
+        &sumAverageIntensity);
+
+    // Histogram bins for Chroma
+    computeIntensityHistogramBinsChroma(
+        curFrame,
+        &sumAverageIntensityCb,
+        &sumAverageIntensityCr);
+
+    curFrame->m_lowres.averageIntensity[0] = (uint8_t)((sumAverageIntensity + ((curFrame->m_lowres.widthFullRes * curFrame->m_lowres.heightFullRes) >> 1)) / (curFrame->m_lowres.widthFullRes * curFrame->m_lowres.heightFullRes));
+    curFrame->m_lowres.averageIntensity[1] = (uint8_t)((sumAverageIntensityCb + ((curFrame->m_lowres.widthFullRes * curFrame->m_lowres.heightFullRes) >> 3)) / ((curFrame->m_lowres.widthFullRes * curFrame->m_lowres.heightFullRes) >> 2));
+    curFrame->m_lowres.averageIntensity[2] = (uint8_t)((sumAverageIntensityCr + ((curFrame->m_lowres.widthFullRes * curFrame->m_lowres.heightFullRes) >> 3)) / ((curFrame->m_lowres.widthFullRes * curFrame->m_lowres.heightFullRes) >> 2));
+
+    computePictureStatistics(curFrame);
+
+    curFrame->m_lowres.bHistScenecutAnalyzed = false;
+}
 
 void PreLookaheadGroup::processTasks(int workerThreadID)
 {
@@ -1462,6 +1778,10 @@ void PreLookaheadGroup::processTasks(int workerThreadID)
 
         if (m_lookahead.m_bAdaptiveQuant)
             tld.calcAdaptiveQuantFrame(preFrame, m_lookahead.m_param);
+
+        if (m_lookahead.m_param->bHistBasedSceneCut)
+            tld.collectPictureStatistics(preFrame);
+
         tld.lowresIntraEstimate(preFrame->m_lowres, m_lookahead.m_param->rc.qgSize);
         preFrame->m_lowresInit = true;
 
diff --git a/source/encoder/slicetype.h b/source/encoder/slicetype.h
index 490e7ba1a..442063cf4 100644
--- a/source/encoder/slicetype.h
+++ b/source/encoder/slicetype.h
@@ -44,6 +44,24 @@ class Lookahead;
 #define EDGE_INCLINATION 45
 #define TEMPORAL_SCENECUT_THRESHOLD 50
 
+#define X265_ABS(a)                        (((a) < 0) ? (-(a)) : (a))
+
+#define PICTURE_DIFF_VARIANCE_TH            390
+#define PICTURE_VARIANCE_TH                 1500
+#define LOW_VAR_SCENE_CHANGE_TH             2250
+#define HIGH_VAR_SCENE_CHANGE_TH            3500
+
+#define PICTURE_DIFF_VARIANCE_CHROMA_TH     10
+#define PICTURE_VARIANCE_CHROMA_TH          20
+#define LOW_VAR_SCENE_CHANGE_CHROMA_TH      2250/4
+#define HIGH_VAR_SCENE_CHANGE_CHROMA_TH     3500/4
+
+#define FLASH_TH                            1.5
+#define FADE_TH                             4
+#define INTENSITY_CHANGE_TH                 4
+
+#define NUM64x64INPIC(w,h)                  ((w*h)>> (MAX_LOG2_CU_SIZE<<1))
+
 #if HIGH_BIT_DEPTH
 #define EDGE_THRESHOLD 1023.0
 #else
@@ -93,6 +111,27 @@ struct LookaheadTLD
 
     ~LookaheadTLD() { X265_FREE(wbuffer[0]); }
 
+    void collectPictureStatistics(Frame *curFrame);
+    void computeIntensityHistogramBinsLuma(Frame *curFrame, uint64_t *sumAvgIntensityTotalSegmentsLuma);
+
+    void computeIntensityHistogramBinsChroma(
+        Frame    *curFrame,
+        uint64_t *sumAverageIntensityCb,
+        uint64_t *sumAverageIntensityCr);
+
+    void calculateHistogram(
+        pixel    *inputSrc,
+        uint32_t  inputWidth,
+        uint32_t  inputHeight,
+        intptr_t  stride,
+        uint8_t   dsFactor,
+        uint32_t *histogram,
+        uint64_t *sum);
+
+    void computePictureStatistics(Frame *curFrame);
+
+    uint32_t calcVariance(pixel* src, intptr_t stride, intptr_t blockOffset, uint32_t plane);
+
     void calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param);
     void lowresIntraEstimate(Lowres& fenc, uint32_t qgSize);
 
@@ -152,6 +191,14 @@ public:
     bool          m_isFadeIn;
     uint64_t      m_fadeCount;
     int           m_fadeStart;
+
+    uint32_t    **m_accHistDiffRunningAvgCb;
+    uint32_t    **m_accHistDiffRunningAvgCr;
+    uint32_t    **m_accHistDiffRunningAvg;
+
+    bool          m_resetRunningAvg;
+    uint32_t      m_segmentCountThreshold;
+
     Lookahead(x265_param *param, ThreadPool *pool);
 #if DETAILED_CU_STATS
     int64_t       m_slicetypeDecideElapsedTime;
@@ -184,6 +231,7 @@ protected:
     /* called by slicetypeAnalyse() to make slice decisions */
     bool    scenecut(Lowres **frames, int p0, int p1, bool bRealScenecut, int numFrames);
     bool    scenecutInternal(Lowres **frames, int p0, int p1, bool bRealScenecut);
+
     void    slicetypePath(Lowres **frames, int length, char(*best_paths)[X265_LOOKAHEAD_MAX + 1]);
     int64_t slicetypePathCost(Lowres **frames, char *path, int64_t threshold);
     int64_t vbvFrameCost(Lowres **frames, int p0, int p1, int b);
-- 
2.37.2.windows.2

