# HG changeset patch
# User Divya Manivannan <di...@multicorewareinc.com>
# Date 1500875370 -19800
#      Mon Jul 24 11:19:30 2017 +0530
# Node ID 7c66e75477380975a55f9ce38c6cb0b639f731c9
# Parent  2718cb5dd67f5e6735c9ee37df19c491f1473a60
Skip the lookahead when analysis reuse mode is load and analysis file writing
is disabled and scale factor is enabled.

All the lookahead data are shared from analysis save mode to load mode, so it is
skipped to improve performance. All the lookahead costs are approximated based
on scalefactor.

diff -r 2718cb5dd67f -r 7c66e7547738 source/common/common.h
--- a/source/common/common.h    Tue Sep 05 11:21:56 2017 +0530
+++ b/source/common/common.h    Mon Jul 24 11:19:30 2017 +0530
@@ -207,7 +207,6 @@
 
 // arbitrary, but low because SATD scores are 1/4 normal
 #define X265_LOOKAHEAD_QP (12 + QP_BD_OFFSET)
-#define X265_LOOKAHEAD_MAX 250
 
 // Use the same size blocks as x264.  Using larger blocks seems to give 
artificially
 // high cost estimates (intra and inter both suffer)
diff -r 2718cb5dd67f -r 7c66e7547738 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp        Tue Sep 05 11:21:56 2017 +0530
+++ b/source/encoder/encoder.cpp        Mon Jul 24 11:19:30 2017 +0530
@@ -790,9 +790,22 @@
         {
             /* readAnalysisFile reads analysis data for the frame and 
allocates memory based on slicetype */
             readAnalysisFile(&inFrame->m_analysisData, inFrame->m_poc, pic_in);
+            inFrame->m_poc = inFrame->m_analysisData.poc;
             sliceType = inFrame->m_analysisData.sliceType;
             inFrame->m_lowres.bScenecut = !!inFrame->m_analysisData.bScenecut;
             inFrame->m_lowres.satdCost = inFrame->m_analysisData.satdCost;
+            if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+            {
+                inFrame->m_lowres.sliceType = sliceType;
+                inFrame->m_lowres.bKeyframe = 
!!inFrame->m_analysisData.lookahead.keyframe;
+                inFrame->m_lowres.bLastMiniGopBFrame = 
!!inFrame->m_analysisData.lookahead.lastMiniGopBFrame;
+                int vbvCount = m_param->lookaheadDepth + m_param->bframes + 2;
+                for (int index = 0; index < vbvCount; index++)
+                {
+                    inFrame->m_lowres.plannedSatd[index] = 
inFrame->m_analysisData.lookahead.plannedSatd[index];
+                    inFrame->m_lowres.plannedType[index] = 
inFrame->m_analysisData.lookahead.plannedType[index];
+                }
+            }
         }
         if (m_param->bUseRcStats && pic_in->rcData)
         {
@@ -886,12 +899,39 @@
                     pic_out->analysisData.poc = pic_out->poc;
                     pic_out->analysisData.sliceType = pic_out->sliceType;
                     pic_out->analysisData.bScenecut = 
outFrame->m_lowres.bScenecut;
-                    pic_out->analysisData.satdCost  = 
outFrame->m_lowres.satdCost;                    
+                    pic_out->analysisData.satdCost  = 
outFrame->m_lowres.satdCost;
                     pic_out->analysisData.numCUsInFrame = 
outFrame->m_analysisData.numCUsInFrame;
+                    pic_out->analysisData.numCuInHeight = 
outFrame->m_analysisData.numCuInHeight;
                     pic_out->analysisData.numPartitions = 
outFrame->m_analysisData.numPartitions;
                     pic_out->analysisData.wt = outFrame->m_analysisData.wt;
                     pic_out->analysisData.interData = 
outFrame->m_analysisData.interData;
                     pic_out->analysisData.intraData = 
outFrame->m_analysisData.intraData;
+                    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+                    {
+                        pic_out->analysisData.satdCost *= m_param->scaleFactor 
* 2;
+                        pic_out->analysisData.lookahead.keyframe = 
outFrame->m_lowres.bKeyframe;
+                        pic_out->analysisData.lookahead.lastMiniGopBFrame = 
outFrame->m_lowres.bLastMiniGopBFrame;
+                        int vbvCount = m_param->lookaheadDepth + 
m_param->bframes + 2;
+                        for (int index = 0; index < vbvCount; index++)
+                        {
+                            pic_out->analysisData.lookahead.plannedSatd[index] 
= outFrame->m_lowres.plannedSatd[index] * m_param->scaleFactor * 2;
+                            pic_out->analysisData.lookahead.plannedType[index] 
= outFrame->m_lowres.plannedType[index];
+                        }
+                        for (uint32_t index = 0; index < 
pic_out->analysisData.numCuInHeight; index++)
+                        {
+                            
outFrame->m_analysisData.lookahead.intraSatdForVbv[index] = 
outFrame->m_encData->m_rowStat[index].intraSatdForVbv * m_param->scaleFactor * 
2;
+                            
outFrame->m_analysisData.lookahead.satdForVbv[index] = 
outFrame->m_encData->m_rowStat[index].satdForVbv * m_param->scaleFactor * 2;
+                        }
+                        pic_out->analysisData.lookahead.intraSatdForVbv = 
outFrame->m_analysisData.lookahead.intraSatdForVbv;
+                        pic_out->analysisData.lookahead.satdForVbv = 
outFrame->m_analysisData.lookahead.satdForVbv;
+                        for (uint32_t index = 0; index < 
pic_out->analysisData.numCUsInFrame; index++)
+                        {
+                            
outFrame->m_analysisData.lookahead.intraVbvCost[index] = 
outFrame->m_encData->m_cuStat[index].intraVbvCost * m_param->scaleFactor * 2;
+                            outFrame->m_analysisData.lookahead.vbvCost[index] 
= outFrame->m_encData->m_cuStat[index].vbvCost * m_param->scaleFactor * 2;
+                        }
+                        pic_out->analysisData.lookahead.intraVbvCost = 
outFrame->m_analysisData.lookahead.intraVbvCost;
+                        pic_out->analysisData.lookahead.vbvCost = 
outFrame->m_analysisData.lookahead.vbvCost;
+                    }
                     writeAnalysisFile(&pic_out->analysisData, 
*outFrame->m_encData);
                     if (m_param->bUseAnalysisFile)
                         freeAnalysis(&pic_out->analysisData);
@@ -1054,7 +1094,19 @@
                 slice->m_maxNumMergeCand = m_param->maxNumMergeCand;
                 slice->m_endCUAddr = slice->realEndAddress(m_sps.numCUsInFrame 
* m_param->num4x4Partitions);
             }
-
+            if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && 
!m_param->bUseAnalysisFile && m_param->scaleFactor)
+            {
+                for (uint32_t index = 0; index < 
frameEnc->m_analysisData.numCuInHeight; index++)
+                {
+                    frameEnc->m_encData->m_rowStat[index].intraSatdForVbv = 
frameEnc->m_analysisData.lookahead.intraSatdForVbv[index];
+                    frameEnc->m_encData->m_rowStat[index].satdForVbv = 
frameEnc->m_analysisData.lookahead.satdForVbv[index];
+                }
+                for (uint32_t index = 0; index < 
frameEnc->m_analysisData.numCUsInFrame; index++)
+                {
+                    frameEnc->m_encData->m_cuStat[index].intraVbvCost = 
frameEnc->m_analysisData.lookahead.intraVbvCost[index];
+                    frameEnc->m_encData->m_cuStat[index].vbvCost = 
frameEnc->m_analysisData.lookahead.vbvCost[index];
+                }
+            }
             if (m_param->searchMethod == X265_SEA && 
frameEnc->m_lowres.sliceType != X265_TYPE_B)
             {
                 int padX = m_param->maxCUSize + 32;
@@ -1129,6 +1181,7 @@
 
                 uint32_t numCUsInFrame   = widthInCU * heightInCU;
                 analysis->numCUsInFrame  = numCUsInFrame;
+                analysis->numCuInHeight = heightInCU;
                 analysis->numPartitions  = m_param->num4x4Partitions;
                 allocAnalysis(analysis);
             }
@@ -2705,6 +2758,13 @@
 {
     X265_CHECK(analysis->sliceType, "invalid slice type\n");
     analysis->interData = analysis->intraData = NULL;
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+    {
+        CHECKED_MALLOC_ZERO(analysis->lookahead.intraSatdForVbv, uint32_t, 
analysis->numCuInHeight);
+        CHECKED_MALLOC_ZERO(analysis->lookahead.satdForVbv, uint32_t, 
analysis->numCuInHeight);
+        CHECKED_MALLOC_ZERO(analysis->lookahead.intraVbvCost, uint32_t, 
analysis->numCUsInFrame);
+        CHECKED_MALLOC_ZERO(analysis->lookahead.vbvCost, uint32_t, 
analysis->numCUsInFrame);
+    }
     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == 
X265_TYPE_I)
     {
         if (m_param->analysisReuseLevel < 2)
@@ -2770,6 +2830,13 @@
 
 void Encoder::freeAnalysis(x265_analysis_data* analysis)
 {
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+    {
+        X265_FREE(analysis->lookahead.satdForVbv);
+        X265_FREE(analysis->lookahead.intraSatdForVbv);
+        X265_FREE(analysis->lookahead.vbvCost);
+        X265_FREE(analysis->lookahead.intraVbvCost);
+    }
     /* Early exit freeing weights alone if level is 1 (when there is no 
analysis inter/intra) */
     if (analysis->sliceType > X265_TYPE_I && analysis->wt)
         X265_FREE(analysis->wt);
@@ -2949,7 +3016,10 @@
     X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFile, 
&(picData->bScenecut));
     X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFile, 
&(picData->satdCost));
     X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFile, 
&(picData->numCUsInFrame));
+    X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1, m_analysisFile, 
&(picData->numCuInHeight));
     X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFile, 
&(picData->numPartitions));
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+        X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1, 
m_analysisFile, &(picData->lookahead));
     int scaledNumPartition = analysis->numPartitions;
     int factor = 1 << m_param->scaleFactor;
 
@@ -2958,7 +3028,13 @@
 
     /* Memory is allocated for inter and intra analysis data based on the 
slicetype */
     allocAnalysis(analysis);
-
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+    {
+        X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), 
analysis->numCUsInFrame, m_analysisFile, picData->lookahead.intraVbvCost);
+        X265_FREAD(analysis->lookahead.vbvCost, sizeof(uint32_t), 
analysis->numCUsInFrame, m_analysisFile, picData->lookahead.vbvCost);
+        X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), 
analysis->numCuInHeight, m_analysisFile, picData->lookahead.satdForVbv);
+        X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), 
analysis->numCuInHeight, m_analysisFile, picData->lookahead.intraSatdForVbv);
+    }
     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == 
X265_TYPE_I)
     {
         if (m_param->analysisReuseLevel < 2)
diff -r 2718cb5dd67f -r 7c66e7547738 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Tue Sep 05 11:21:56 2017 +0530
+++ b/source/encoder/frameencoder.cpp   Mon Jul 24 11:19:30 2017 +0530
@@ -1376,17 +1376,19 @@
             /* TODO: use defines from slicetype.h for lowres block size */
             uint32_t block_y = (ctu->m_cuPelY >> m_param->maxLog2CUSize) * 
noOfBlocks;
             uint32_t block_x = (ctu->m_cuPelX >> m_param->maxLog2CUSize) * 
noOfBlocks;
-            
-            cuStat.vbvCost = 0;
-            cuStat.intraVbvCost = 0;
-            for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; 
h++, block_y++)
+            if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD || 
m_param->bUseAnalysisFile || !m_param->scaleFactor)
             {
-                uint32_t idx = block_x + (block_y * maxBlockCols);
+                cuStat.vbvCost = 0;
+                cuStat.intraVbvCost = 0;
+                for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; 
h++, block_y++)
+                {
+                    uint32_t idx = block_x + (block_y * maxBlockCols);
 
-                for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < 
maxBlockCols; w++, idx++)
-                {
-                    cuStat.vbvCost += m_frame->m_lowres.lowresCostForRc[idx] & 
LOWRES_COST_MASK;
-                    cuStat.intraVbvCost += m_frame->m_lowres.intraCost[idx];
+                    for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < 
maxBlockCols; w++, idx++)
+                    {
+                        cuStat.vbvCost += 
m_frame->m_lowres.lowresCostForRc[idx] & LOWRES_COST_MASK;
+                        cuStat.intraVbvCost += 
m_frame->m_lowres.intraCost[idx];
+                    }
                 }
             }
         }
diff -r 2718cb5dd67f -r 7c66e7547738 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp      Tue Sep 05 11:21:56 2017 +0530
+++ b/source/encoder/slicetype.cpp      Mon Jul 24 11:19:30 2017 +0530
@@ -742,9 +742,21 @@
 /* Called by API thread */
 void Lookahead::addPicture(Frame& curFrame, int sliceType)
 {
-    checkLookaheadQueue(m_inputCount);
-    curFrame.m_lowres.sliceType = sliceType;
-    addPicture(curFrame);
+    if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && 
!m_param->bUseAnalysisFile && m_param->scaleFactor)
+    {
+        if (!m_filled)
+            m_filled = true;
+        m_outputLock.acquire();
+        m_outputQueue.pushBack(curFrame);
+        m_outputLock.release();
+        m_inputCount++;
+    }
+    else
+    {
+        checkLookaheadQueue(m_inputCount);
+        curFrame.m_lowres.sliceType = sliceType;
+        addPicture(curFrame);
+    }
 }
 
 void Lookahead::addPicture(Frame& curFrame)
@@ -831,6 +843,9 @@
             return out;
         }
 
+        if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && 
!m_param->bUseAnalysisFile && m_param->scaleFactor)
+            return NULL;
+
         findJob(-1); /* run slicetypeDecide() if necessary */
 
         m_inputLock.acquire();
@@ -887,68 +902,68 @@
     default:
         return;
     }
+    if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD || 
m_param->bUseAnalysisFile || !m_param->scaleFactor)
+    {
+        X265_CHECK(curFrame->m_lowres.costEst[b - p0][p1 - b] > 0, "Slice cost 
not estimated\n")
+        if (m_param->rc.cuTree && !m_param->rc.bStatRead)
+            /* update row satds based on cutree offsets */
+            curFrame->m_lowres.satdCost = frameCostRecalculate(frames, p0, p1, 
b);
+        else if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD)
+        {
+            if (m_param->rc.aqMode)
+                curFrame->m_lowres.satdCost = curFrame->m_lowres.costEstAq[b - 
p0][p1 - b];
+            else
+                curFrame->m_lowres.satdCost = curFrame->m_lowres.costEst[b - 
p0][p1 - b];
+        }
+        if (m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate)
+        {
+            /* aggregate lowres row satds to CTU resolution */
+            curFrame->m_lowres.lowresCostForRc = 
curFrame->m_lowres.lowresCosts[b - p0][p1 - b];
+            uint32_t lowresRow = 0, lowresCol = 0, lowresCuIdx = 0, sum = 0, 
intraSum = 0;
+            uint32_t scale = m_param->maxCUSize / (2 * X265_LOWRES_CU_SIZE);
+            uint32_t numCuInHeight = (m_param->sourceHeight + 
m_param->maxCUSize - 1) / m_param->maxCUSize;
+            uint32_t widthInLowresCu = (uint32_t)m_8x8Width, heightInLowresCu 
= (uint32_t)m_8x8Height;
+            double *qp_offset = 0;
+            /* Factor in qpoffsets based on Aq/Cutree in CU costs */
+            if (m_param->rc.aqMode || m_param->bAQMotion)
+                qp_offset = (frames[b]->sliceType == X265_TYPE_B || 
!m_param->rc.cuTree) ? frames[b]->qpAqOffset : frames[b]->qpCuTreeOffset;
 
-    X265_CHECK(curFrame->m_lowres.costEst[b - p0][p1 - b] > 0, "Slice cost not 
estimated\n")
-
-    if (m_param->rc.cuTree && !m_param->rc.bStatRead)
-        /* update row satds based on cutree offsets */
-        curFrame->m_lowres.satdCost = frameCostRecalculate(frames, p0, p1, b);
-    else if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD || 
m_param->scaleFactor)
-    {
-        if (m_param->rc.aqMode)
-            curFrame->m_lowres.satdCost = curFrame->m_lowres.costEstAq[b - 
p0][p1 - b];
-        else
-            curFrame->m_lowres.satdCost = curFrame->m_lowres.costEst[b - 
p0][p1 - b];
-    }
-
-    if (m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate)
-    {
-        /* aggregate lowres row satds to CTU resolution */
-        curFrame->m_lowres.lowresCostForRc = curFrame->m_lowres.lowresCosts[b 
- p0][p1 - b];
-        uint32_t lowresRow = 0, lowresCol = 0, lowresCuIdx = 0, sum = 0, 
intraSum = 0;
-        uint32_t scale = m_param->maxCUSize / (2 * X265_LOWRES_CU_SIZE);
-        uint32_t numCuInHeight = (m_param->sourceHeight + m_param->maxCUSize - 
1) / m_param->maxCUSize;
-        uint32_t widthInLowresCu = (uint32_t)m_8x8Width, heightInLowresCu = 
(uint32_t)m_8x8Height;
-        double *qp_offset = 0;
-        /* Factor in qpoffsets based on Aq/Cutree in CU costs */
-        if (m_param->rc.aqMode || m_param->bAQMotion)
-            qp_offset = (frames[b]->sliceType == X265_TYPE_B || 
!m_param->rc.cuTree) ? frames[b]->qpAqOffset : frames[b]->qpCuTreeOffset;
-
-        for (uint32_t row = 0; row < numCuInHeight; row++)
-        {
-            lowresRow = row * scale;
-            for (uint32_t cnt = 0; cnt < scale && lowresRow < 
heightInLowresCu; lowresRow++, cnt++)
+            for (uint32_t row = 0; row < numCuInHeight; row++)
             {
-                sum = 0; intraSum = 0;
-                int diff = 0;
-                lowresCuIdx = lowresRow * widthInLowresCu;
-                for (lowresCol = 0; lowresCol < widthInLowresCu; lowresCol++, 
lowresCuIdx++)
+                lowresRow = row * scale;
+                for (uint32_t cnt = 0; cnt < scale && lowresRow < 
heightInLowresCu; lowresRow++, cnt++)
                 {
-                    uint16_t lowresCuCost = 
curFrame->m_lowres.lowresCostForRc[lowresCuIdx] & LOWRES_COST_MASK;
-                    if (qp_offset)
+                    sum = 0; intraSum = 0;
+                    int diff = 0;
+                    lowresCuIdx = lowresRow * widthInLowresCu;
+                    for (lowresCol = 0; lowresCol < widthInLowresCu; 
lowresCol++, lowresCuIdx++)
                     {
-                        double qpOffset;
-                        if (m_param->rc.qgSize == 8)
-                            qpOffset = (qp_offset[lowresCol * 2 + lowresRow * 
widthInLowresCu * 4] +
-                                        qp_offset[lowresCol * 2 + lowresRow * 
widthInLowresCu * 4 + 1] +
-                                        qp_offset[lowresCol * 2 + lowresRow * 
widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes] +
-                                        qp_offset[lowresCol * 2 + lowresRow * 
widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes + 1]) / 4;
-                        else
-                            qpOffset = qp_offset[lowresCuIdx];
-                        lowresCuCost = (uint16_t)((lowresCuCost * 
x265_exp2fix8(qpOffset) + 128) >> 8);
-                        int32_t intraCuCost = 
curFrame->m_lowres.intraCost[lowresCuIdx];
-                        curFrame->m_lowres.intraCost[lowresCuIdx] = 
(intraCuCost * x265_exp2fix8(qpOffset) + 128) >> 8;
+                        uint16_t lowresCuCost = 
curFrame->m_lowres.lowresCostForRc[lowresCuIdx] & LOWRES_COST_MASK;
+                        if (qp_offset)
+                        {
+                            double qpOffset;
+                            if (m_param->rc.qgSize == 8)
+                                qpOffset = (qp_offset[lowresCol * 2 + 
lowresRow * widthInLowresCu * 4] +
+                                qp_offset[lowresCol * 2 + lowresRow * 
widthInLowresCu * 4 + 1] +
+                                qp_offset[lowresCol * 2 + lowresRow * 
widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes] +
+                                qp_offset[lowresCol * 2 + lowresRow * 
widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes + 1]) / 4;
+                            else
+                                qpOffset = qp_offset[lowresCuIdx];
+                            lowresCuCost = (uint16_t)((lowresCuCost * 
x265_exp2fix8(qpOffset) + 128) >> 8);
+                            int32_t intraCuCost = 
curFrame->m_lowres.intraCost[lowresCuIdx];
+                            curFrame->m_lowres.intraCost[lowresCuIdx] = 
(intraCuCost * x265_exp2fix8(qpOffset) + 128) >> 8;
+                        }
+                        if (m_param->bIntraRefresh && slice->m_sliceType == 
X265_TYPE_P)
+                            for (uint32_t x = 
curFrame->m_encData->m_pir.pirStartCol; x <= 
curFrame->m_encData->m_pir.pirEndCol; x++)
+                                diff += 
curFrame->m_lowres.intraCost[lowresCuIdx] - lowresCuCost;
+                        curFrame->m_lowres.lowresCostForRc[lowresCuIdx] = 
lowresCuCost;
+                        sum += lowresCuCost;
+                        intraSum += curFrame->m_lowres.intraCost[lowresCuIdx];
                     }
-                    if (m_param->bIntraRefresh && slice->m_sliceType == 
X265_TYPE_P)
-                        for (uint32_t x = 
curFrame->m_encData->m_pir.pirStartCol; x <= 
curFrame->m_encData->m_pir.pirEndCol; x++)
-                            diff += curFrame->m_lowres.intraCost[lowresCuIdx] 
- lowresCuCost;
-                    curFrame->m_lowres.lowresCostForRc[lowresCuIdx] = 
lowresCuCost;
-                    sum += lowresCuCost;
-                    intraSum += curFrame->m_lowres.intraCost[lowresCuIdx];
+                    curFrame->m_encData->m_rowStat[row].satdForVbv += sum;
+                    curFrame->m_encData->m_rowStat[row].satdForVbv += diff;
+                    curFrame->m_encData->m_rowStat[row].intraSatdForVbv += 
intraSum;
                 }
-                curFrame->m_encData->m_rowStat[row].satdForVbv += sum;
-                curFrame->m_encData->m_rowStat[row].satdForVbv += diff;
-                curFrame->m_encData->m_rowStat[row].intraSatdForVbv += 
intraSum;
             }
         }
     }
diff -r 2718cb5dd67f -r 7c66e7547738 source/x265.h
--- a/source/x265.h     Tue Sep 05 11:21:56 2017 +0530
+++ b/source/x265.h     Mon Jul 24 11:19:30 2017 +0530
@@ -88,6 +88,20 @@
     uint8_t* payload;
 } x265_nal;
 
+#define X265_LOOKAHEAD_MAX 250
+
+typedef struct x265_lookahead_data
+{
+    int64_t   plannedSatd[X265_LOOKAHEAD_MAX + 1];
+    uint32_t  *vbvCost;
+    uint32_t  *intraVbvCost;
+    uint32_t  *satdForVbv;
+    uint32_t  *intraSatdForVbv;
+    int       keyframe;
+    int       lastMiniGopBFrame;
+    int       plannedType[X265_LOOKAHEAD_MAX + 1];
+} x265_lookahead_data;
+
 /* Stores all analysis data for a single frame */
 typedef struct x265_analysis_data
 {
@@ -102,6 +116,8 @@
     void*            wt;
     void*            interData;
     void*            intraData;
+    uint32_t         numCuInHeight;
+    x265_lookahead_data lookahead;
 } x265_analysis_data;
 
 /* cu statistics */
# HG changeset patch
# User Divya Manivannan <di...@multicorewareinc.com>
# Date 1500875370 -19800
#      Mon Jul 24 11:19:30 2017 +0530
# Node ID 7c66e75477380975a55f9ce38c6cb0b639f731c9
# Parent  2718cb5dd67f5e6735c9ee37df19c491f1473a60
Skip the lookahead when analysis reuse mode is load and analysis file writing
is disabled and scale factor is enabled.

All the lookahead data are shared from analysis save mode to load mode, so it is
skipped to improve performance. All the lookahead costs are approximated based
on scalefactor.

diff -r 2718cb5dd67f -r 7c66e7547738 source/common/common.h
--- a/source/common/common.h	Tue Sep 05 11:21:56 2017 +0530
+++ b/source/common/common.h	Mon Jul 24 11:19:30 2017 +0530
@@ -207,7 +207,6 @@
 
 // arbitrary, but low because SATD scores are 1/4 normal
 #define X265_LOOKAHEAD_QP (12 + QP_BD_OFFSET)
-#define X265_LOOKAHEAD_MAX 250
 
 // Use the same size blocks as x264.  Using larger blocks seems to give artificially
 // high cost estimates (intra and inter both suffer)
diff -r 2718cb5dd67f -r 7c66e7547738 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Tue Sep 05 11:21:56 2017 +0530
+++ b/source/encoder/encoder.cpp	Mon Jul 24 11:19:30 2017 +0530
@@ -790,9 +790,22 @@
         {
             /* readAnalysisFile reads analysis data for the frame and allocates memory based on slicetype */
             readAnalysisFile(&inFrame->m_analysisData, inFrame->m_poc, pic_in);
+            inFrame->m_poc = inFrame->m_analysisData.poc;
             sliceType = inFrame->m_analysisData.sliceType;
             inFrame->m_lowres.bScenecut = !!inFrame->m_analysisData.bScenecut;
             inFrame->m_lowres.satdCost = inFrame->m_analysisData.satdCost;
+            if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+            {
+                inFrame->m_lowres.sliceType = sliceType;
+                inFrame->m_lowres.bKeyframe = !!inFrame->m_analysisData.lookahead.keyframe;
+                inFrame->m_lowres.bLastMiniGopBFrame = !!inFrame->m_analysisData.lookahead.lastMiniGopBFrame;
+                int vbvCount = m_param->lookaheadDepth + m_param->bframes + 2;
+                for (int index = 0; index < vbvCount; index++)
+                {
+                    inFrame->m_lowres.plannedSatd[index] = inFrame->m_analysisData.lookahead.plannedSatd[index];
+                    inFrame->m_lowres.plannedType[index] = inFrame->m_analysisData.lookahead.plannedType[index];
+                }
+            }
         }
         if (m_param->bUseRcStats && pic_in->rcData)
         {
@@ -886,12 +899,39 @@
                     pic_out->analysisData.poc = pic_out->poc;
                     pic_out->analysisData.sliceType = pic_out->sliceType;
                     pic_out->analysisData.bScenecut = outFrame->m_lowres.bScenecut;
-                    pic_out->analysisData.satdCost  = outFrame->m_lowres.satdCost;                    
+                    pic_out->analysisData.satdCost  = outFrame->m_lowres.satdCost;
                     pic_out->analysisData.numCUsInFrame = outFrame->m_analysisData.numCUsInFrame;
+                    pic_out->analysisData.numCuInHeight = outFrame->m_analysisData.numCuInHeight;
                     pic_out->analysisData.numPartitions = outFrame->m_analysisData.numPartitions;
                     pic_out->analysisData.wt = outFrame->m_analysisData.wt;
                     pic_out->analysisData.interData = outFrame->m_analysisData.interData;
                     pic_out->analysisData.intraData = outFrame->m_analysisData.intraData;
+                    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+                    {
+                        pic_out->analysisData.satdCost *= m_param->scaleFactor * 2;
+                        pic_out->analysisData.lookahead.keyframe = outFrame->m_lowres.bKeyframe;
+                        pic_out->analysisData.lookahead.lastMiniGopBFrame = outFrame->m_lowres.bLastMiniGopBFrame;
+                        int vbvCount = m_param->lookaheadDepth + m_param->bframes + 2;
+                        for (int index = 0; index < vbvCount; index++)
+                        {
+                            pic_out->analysisData.lookahead.plannedSatd[index] = outFrame->m_lowres.plannedSatd[index] * m_param->scaleFactor * 2;
+                            pic_out->analysisData.lookahead.plannedType[index] = outFrame->m_lowres.plannedType[index];
+                        }
+                        for (uint32_t index = 0; index < pic_out->analysisData.numCuInHeight; index++)
+                        {
+                            outFrame->m_analysisData.lookahead.intraSatdForVbv[index] = outFrame->m_encData->m_rowStat[index].intraSatdForVbv * m_param->scaleFactor * 2;
+                            outFrame->m_analysisData.lookahead.satdForVbv[index] = outFrame->m_encData->m_rowStat[index].satdForVbv * m_param->scaleFactor * 2;
+                        }
+                        pic_out->analysisData.lookahead.intraSatdForVbv = outFrame->m_analysisData.lookahead.intraSatdForVbv;
+                        pic_out->analysisData.lookahead.satdForVbv = outFrame->m_analysisData.lookahead.satdForVbv;
+                        for (uint32_t index = 0; index < pic_out->analysisData.numCUsInFrame; index++)
+                        {
+                            outFrame->m_analysisData.lookahead.intraVbvCost[index] = outFrame->m_encData->m_cuStat[index].intraVbvCost * m_param->scaleFactor * 2;
+                            outFrame->m_analysisData.lookahead.vbvCost[index] = outFrame->m_encData->m_cuStat[index].vbvCost * m_param->scaleFactor * 2;
+                        }
+                        pic_out->analysisData.lookahead.intraVbvCost = outFrame->m_analysisData.lookahead.intraVbvCost;
+                        pic_out->analysisData.lookahead.vbvCost = outFrame->m_analysisData.lookahead.vbvCost;
+                    }
                     writeAnalysisFile(&pic_out->analysisData, *outFrame->m_encData);
                     if (m_param->bUseAnalysisFile)
                         freeAnalysis(&pic_out->analysisData);
@@ -1054,7 +1094,19 @@
                 slice->m_maxNumMergeCand = m_param->maxNumMergeCand;
                 slice->m_endCUAddr = slice->realEndAddress(m_sps.numCUsInFrame * m_param->num4x4Partitions);
             }
-
+            if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && !m_param->bUseAnalysisFile && m_param->scaleFactor)
+            {
+                for (uint32_t index = 0; index < frameEnc->m_analysisData.numCuInHeight; index++)
+                {
+                    frameEnc->m_encData->m_rowStat[index].intraSatdForVbv = frameEnc->m_analysisData.lookahead.intraSatdForVbv[index];
+                    frameEnc->m_encData->m_rowStat[index].satdForVbv = frameEnc->m_analysisData.lookahead.satdForVbv[index];
+                }
+                for (uint32_t index = 0; index < frameEnc->m_analysisData.numCUsInFrame; index++)
+                {
+                    frameEnc->m_encData->m_cuStat[index].intraVbvCost = frameEnc->m_analysisData.lookahead.intraVbvCost[index];
+                    frameEnc->m_encData->m_cuStat[index].vbvCost = frameEnc->m_analysisData.lookahead.vbvCost[index];
+                }
+            }
             if (m_param->searchMethod == X265_SEA && frameEnc->m_lowres.sliceType != X265_TYPE_B)
             {
                 int padX = m_param->maxCUSize + 32;
@@ -1129,6 +1181,7 @@
 
                 uint32_t numCUsInFrame   = widthInCU * heightInCU;
                 analysis->numCUsInFrame  = numCUsInFrame;
+                analysis->numCuInHeight = heightInCU;
                 analysis->numPartitions  = m_param->num4x4Partitions;
                 allocAnalysis(analysis);
             }
@@ -2705,6 +2758,13 @@
 {
     X265_CHECK(analysis->sliceType, "invalid slice type\n");
     analysis->interData = analysis->intraData = NULL;
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+    {
+        CHECKED_MALLOC_ZERO(analysis->lookahead.intraSatdForVbv, uint32_t, analysis->numCuInHeight);
+        CHECKED_MALLOC_ZERO(analysis->lookahead.satdForVbv, uint32_t, analysis->numCuInHeight);
+        CHECKED_MALLOC_ZERO(analysis->lookahead.intraVbvCost, uint32_t, analysis->numCUsInFrame);
+        CHECKED_MALLOC_ZERO(analysis->lookahead.vbvCost, uint32_t, analysis->numCUsInFrame);
+    }
     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)
     {
         if (m_param->analysisReuseLevel < 2)
@@ -2770,6 +2830,13 @@
 
 void Encoder::freeAnalysis(x265_analysis_data* analysis)
 {
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+    {
+        X265_FREE(analysis->lookahead.satdForVbv);
+        X265_FREE(analysis->lookahead.intraSatdForVbv);
+        X265_FREE(analysis->lookahead.vbvCost);
+        X265_FREE(analysis->lookahead.intraVbvCost);
+    }
     /* Early exit freeing weights alone if level is 1 (when there is no analysis inter/intra) */
     if (analysis->sliceType > X265_TYPE_I && analysis->wt)
         X265_FREE(analysis->wt);
@@ -2949,7 +3016,10 @@
     X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFile, &(picData->bScenecut));
     X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFile, &(picData->satdCost));
     X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFile, &(picData->numCUsInFrame));
+    X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1, m_analysisFile, &(picData->numCuInHeight));
     X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFile, &(picData->numPartitions));
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+        X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1, m_analysisFile, &(picData->lookahead));
     int scaledNumPartition = analysis->numPartitions;
     int factor = 1 << m_param->scaleFactor;
 
@@ -2958,7 +3028,13 @@
 
     /* Memory is allocated for inter and intra analysis data based on the slicetype */
     allocAnalysis(analysis);
-
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)
+    {
+        X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFile, picData->lookahead.intraVbvCost);
+        X265_FREAD(analysis->lookahead.vbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFile, picData->lookahead.vbvCost);
+        X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFile, picData->lookahead.satdForVbv);
+        X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFile, picData->lookahead.intraSatdForVbv);
+    }
     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)
     {
         if (m_param->analysisReuseLevel < 2)
diff -r 2718cb5dd67f -r 7c66e7547738 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Tue Sep 05 11:21:56 2017 +0530
+++ b/source/encoder/frameencoder.cpp	Mon Jul 24 11:19:30 2017 +0530
@@ -1376,17 +1376,19 @@
             /* TODO: use defines from slicetype.h for lowres block size */
             uint32_t block_y = (ctu->m_cuPelY >> m_param->maxLog2CUSize) * noOfBlocks;
             uint32_t block_x = (ctu->m_cuPelX >> m_param->maxLog2CUSize) * noOfBlocks;
-            
-            cuStat.vbvCost = 0;
-            cuStat.intraVbvCost = 0;
-            for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)
+            if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD || m_param->bUseAnalysisFile || !m_param->scaleFactor)
             {
-                uint32_t idx = block_x + (block_y * maxBlockCols);
+                cuStat.vbvCost = 0;
+                cuStat.intraVbvCost = 0;
+                for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)
+                {
+                    uint32_t idx = block_x + (block_y * maxBlockCols);
 
-                for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < maxBlockCols; w++, idx++)
-                {
-                    cuStat.vbvCost += m_frame->m_lowres.lowresCostForRc[idx] & LOWRES_COST_MASK;
-                    cuStat.intraVbvCost += m_frame->m_lowres.intraCost[idx];
+                    for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < maxBlockCols; w++, idx++)
+                    {
+                        cuStat.vbvCost += m_frame->m_lowres.lowresCostForRc[idx] & LOWRES_COST_MASK;
+                        cuStat.intraVbvCost += m_frame->m_lowres.intraCost[idx];
+                    }
                 }
             }
         }
diff -r 2718cb5dd67f -r 7c66e7547738 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Tue Sep 05 11:21:56 2017 +0530
+++ b/source/encoder/slicetype.cpp	Mon Jul 24 11:19:30 2017 +0530
@@ -742,9 +742,21 @@
 /* Called by API thread */
 void Lookahead::addPicture(Frame& curFrame, int sliceType)
 {
-    checkLookaheadQueue(m_inputCount);
-    curFrame.m_lowres.sliceType = sliceType;
-    addPicture(curFrame);
+    if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && !m_param->bUseAnalysisFile && m_param->scaleFactor)
+    {
+        if (!m_filled)
+            m_filled = true;
+        m_outputLock.acquire();
+        m_outputQueue.pushBack(curFrame);
+        m_outputLock.release();
+        m_inputCount++;
+    }
+    else
+    {
+        checkLookaheadQueue(m_inputCount);
+        curFrame.m_lowres.sliceType = sliceType;
+        addPicture(curFrame);
+    }
 }
 
 void Lookahead::addPicture(Frame& curFrame)
@@ -831,6 +843,9 @@
             return out;
         }
 
+        if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && !m_param->bUseAnalysisFile && m_param->scaleFactor)
+            return NULL;
+
         findJob(-1); /* run slicetypeDecide() if necessary */
 
         m_inputLock.acquire();
@@ -887,68 +902,68 @@
     default:
         return;
     }
+    if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD || m_param->bUseAnalysisFile || !m_param->scaleFactor)
+    {
+        X265_CHECK(curFrame->m_lowres.costEst[b - p0][p1 - b] > 0, "Slice cost not estimated\n")
+        if (m_param->rc.cuTree && !m_param->rc.bStatRead)
+            /* update row satds based on cutree offsets */
+            curFrame->m_lowres.satdCost = frameCostRecalculate(frames, p0, p1, b);
+        else if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD)
+        {
+            if (m_param->rc.aqMode)
+                curFrame->m_lowres.satdCost = curFrame->m_lowres.costEstAq[b - p0][p1 - b];
+            else
+                curFrame->m_lowres.satdCost = curFrame->m_lowres.costEst[b - p0][p1 - b];
+        }
+        if (m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate)
+        {
+            /* aggregate lowres row satds to CTU resolution */
+            curFrame->m_lowres.lowresCostForRc = curFrame->m_lowres.lowresCosts[b - p0][p1 - b];
+            uint32_t lowresRow = 0, lowresCol = 0, lowresCuIdx = 0, sum = 0, intraSum = 0;
+            uint32_t scale = m_param->maxCUSize / (2 * X265_LOWRES_CU_SIZE);
+            uint32_t numCuInHeight = (m_param->sourceHeight + m_param->maxCUSize - 1) / m_param->maxCUSize;
+            uint32_t widthInLowresCu = (uint32_t)m_8x8Width, heightInLowresCu = (uint32_t)m_8x8Height;
+            double *qp_offset = 0;
+            /* Factor in qpoffsets based on Aq/Cutree in CU costs */
+            if (m_param->rc.aqMode || m_param->bAQMotion)
+                qp_offset = (frames[b]->sliceType == X265_TYPE_B || !m_param->rc.cuTree) ? frames[b]->qpAqOffset : frames[b]->qpCuTreeOffset;
 
-    X265_CHECK(curFrame->m_lowres.costEst[b - p0][p1 - b] > 0, "Slice cost not estimated\n")
-
-    if (m_param->rc.cuTree && !m_param->rc.bStatRead)
-        /* update row satds based on cutree offsets */
-        curFrame->m_lowres.satdCost = frameCostRecalculate(frames, p0, p1, b);
-    else if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD || m_param->scaleFactor)
-    {
-        if (m_param->rc.aqMode)
-            curFrame->m_lowres.satdCost = curFrame->m_lowres.costEstAq[b - p0][p1 - b];
-        else
-            curFrame->m_lowres.satdCost = curFrame->m_lowres.costEst[b - p0][p1 - b];
-    }
-
-    if (m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate)
-    {
-        /* aggregate lowres row satds to CTU resolution */
-        curFrame->m_lowres.lowresCostForRc = curFrame->m_lowres.lowresCosts[b - p0][p1 - b];
-        uint32_t lowresRow = 0, lowresCol = 0, lowresCuIdx = 0, sum = 0, intraSum = 0;
-        uint32_t scale = m_param->maxCUSize / (2 * X265_LOWRES_CU_SIZE);
-        uint32_t numCuInHeight = (m_param->sourceHeight + m_param->maxCUSize - 1) / m_param->maxCUSize;
-        uint32_t widthInLowresCu = (uint32_t)m_8x8Width, heightInLowresCu = (uint32_t)m_8x8Height;
-        double *qp_offset = 0;
-        /* Factor in qpoffsets based on Aq/Cutree in CU costs */
-        if (m_param->rc.aqMode || m_param->bAQMotion)
-            qp_offset = (frames[b]->sliceType == X265_TYPE_B || !m_param->rc.cuTree) ? frames[b]->qpAqOffset : frames[b]->qpCuTreeOffset;
-
-        for (uint32_t row = 0; row < numCuInHeight; row++)
-        {
-            lowresRow = row * scale;
-            for (uint32_t cnt = 0; cnt < scale && lowresRow < heightInLowresCu; lowresRow++, cnt++)
+            for (uint32_t row = 0; row < numCuInHeight; row++)
             {
-                sum = 0; intraSum = 0;
-                int diff = 0;
-                lowresCuIdx = lowresRow * widthInLowresCu;
-                for (lowresCol = 0; lowresCol < widthInLowresCu; lowresCol++, lowresCuIdx++)
+                lowresRow = row * scale;
+                for (uint32_t cnt = 0; cnt < scale && lowresRow < heightInLowresCu; lowresRow++, cnt++)
                 {
-                    uint16_t lowresCuCost = curFrame->m_lowres.lowresCostForRc[lowresCuIdx] & LOWRES_COST_MASK;
-                    if (qp_offset)
+                    sum = 0; intraSum = 0;
+                    int diff = 0;
+                    lowresCuIdx = lowresRow * widthInLowresCu;
+                    for (lowresCol = 0; lowresCol < widthInLowresCu; lowresCol++, lowresCuIdx++)
                     {
-                        double qpOffset;
-                        if (m_param->rc.qgSize == 8)
-                            qpOffset = (qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4] +
-                                        qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + 1] +
-                                        qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes] +
-                                        qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes + 1]) / 4;
-                        else
-                            qpOffset = qp_offset[lowresCuIdx];
-                        lowresCuCost = (uint16_t)((lowresCuCost * x265_exp2fix8(qpOffset) + 128) >> 8);
-                        int32_t intraCuCost = curFrame->m_lowres.intraCost[lowresCuIdx];
-                        curFrame->m_lowres.intraCost[lowresCuIdx] = (intraCuCost * x265_exp2fix8(qpOffset) + 128) >> 8;
+                        uint16_t lowresCuCost = curFrame->m_lowres.lowresCostForRc[lowresCuIdx] & LOWRES_COST_MASK;
+                        if (qp_offset)
+                        {
+                            double qpOffset;
+                            if (m_param->rc.qgSize == 8)
+                                qpOffset = (qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4] +
+                                qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + 1] +
+                                qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes] +
+                                qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.maxBlocksInRowFullRes + 1]) / 4;
+                            else
+                                qpOffset = qp_offset[lowresCuIdx];
+                            lowresCuCost = (uint16_t)((lowresCuCost * x265_exp2fix8(qpOffset) + 128) >> 8);
+                            int32_t intraCuCost = curFrame->m_lowres.intraCost[lowresCuIdx];
+                            curFrame->m_lowres.intraCost[lowresCuIdx] = (intraCuCost * x265_exp2fix8(qpOffset) + 128) >> 8;
+                        }
+                        if (m_param->bIntraRefresh && slice->m_sliceType == X265_TYPE_P)
+                            for (uint32_t x = curFrame->m_encData->m_pir.pirStartCol; x <= curFrame->m_encData->m_pir.pirEndCol; x++)
+                                diff += curFrame->m_lowres.intraCost[lowresCuIdx] - lowresCuCost;
+                        curFrame->m_lowres.lowresCostForRc[lowresCuIdx] = lowresCuCost;
+                        sum += lowresCuCost;
+                        intraSum += curFrame->m_lowres.intraCost[lowresCuIdx];
                     }
-                    if (m_param->bIntraRefresh && slice->m_sliceType == X265_TYPE_P)
-                        for (uint32_t x = curFrame->m_encData->m_pir.pirStartCol; x <= curFrame->m_encData->m_pir.pirEndCol; x++)
-                            diff += curFrame->m_lowres.intraCost[lowresCuIdx] - lowresCuCost;
-                    curFrame->m_lowres.lowresCostForRc[lowresCuIdx] = lowresCuCost;
-                    sum += lowresCuCost;
-                    intraSum += curFrame->m_lowres.intraCost[lowresCuIdx];
+                    curFrame->m_encData->m_rowStat[row].satdForVbv += sum;
+                    curFrame->m_encData->m_rowStat[row].satdForVbv += diff;
+                    curFrame->m_encData->m_rowStat[row].intraSatdForVbv += intraSum;
                 }
-                curFrame->m_encData->m_rowStat[row].satdForVbv += sum;
-                curFrame->m_encData->m_rowStat[row].satdForVbv += diff;
-                curFrame->m_encData->m_rowStat[row].intraSatdForVbv += intraSum;
             }
         }
     }
diff -r 2718cb5dd67f -r 7c66e7547738 source/x265.h
--- a/source/x265.h	Tue Sep 05 11:21:56 2017 +0530
+++ b/source/x265.h	Mon Jul 24 11:19:30 2017 +0530
@@ -88,6 +88,20 @@
     uint8_t* payload;
 } x265_nal;
 
+#define X265_LOOKAHEAD_MAX 250
+
+typedef struct x265_lookahead_data
+{
+    int64_t   plannedSatd[X265_LOOKAHEAD_MAX + 1];
+    uint32_t  *vbvCost;
+    uint32_t  *intraVbvCost;
+    uint32_t  *satdForVbv;
+    uint32_t  *intraSatdForVbv;
+    int       keyframe;
+    int       lastMiniGopBFrame;
+    int       plannedType[X265_LOOKAHEAD_MAX + 1];
+} x265_lookahead_data;
+
 /* Stores all analysis data for a single frame */
 typedef struct x265_analysis_data
 {
@@ -102,6 +116,8 @@
     void*            wt;
     void*            interData;
     void*            intraData;
+    uint32_t         numCuInHeight;
+    x265_lookahead_data lookahead;
 } x265_analysis_data;
 
 /* cu statistics */
_______________________________________________
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to