[x265] [PATCH] remove maxCTU size restriction in scaled save/load encodes

2018-02-27 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1519796358 -19800
#  Wed Feb 28 11:09:18 2018 +0530
# Node ID 55eb3992299530de882829de0d3c0fea6d58b70d
# Parent  7219376de42a1cc378ec957c886b511139d3c201
remove maxCTU size restriction in scaled save/load encodes

The scaled save/load feature requires that the save encode has a maximum CTU
size of 32. The 32x32 blocks are mapped to a 64x64 block in load encode. Due to
this restriction we will be able to heirarchialy encode only 3 resolutions.
WxH - ctu 16
2Wx2H - ctu 32
4Wx4H - ctu 64

diff -r 7219376de42a -r 55eb39922995 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppThu Feb 15 02:21:26 2018 -0800
+++ b/source/encoder/encoder.cppWed Feb 28 11:09:18 2018 +0530
@@ -3272,10 +3272,10 @@
 
 #define X265_FREAD(val, size, readSize, fileOffset, src)\
 if (!m_param->bUseAnalysisFile)\
-{\
+{\
 memcpy(val, src, (size * readSize));\
-}\
-else if (fread(val, size, readSize, fileOffset) != readSize)\
+}\
+else if (fread(val, size, readSize, fileOffset) != readSize)\
 {\
 x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data\n");\
 freeAnalysis(analysis);\
@@ -3334,10 +3334,37 @@
 int scaledNumPartition = analysis->numPartitions;
 int factor = 1 << m_param->scaleFactor;
 
+int numPartitions = analysis->numPartitions;
+int numCUsInFrame = analysis->numCUsInFrame;
+int extendedWidth, extendedHeight;
+cuLocation cuLoc;
+cuLoc.heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> 
m_param->maxLog2CUSize;
+cuLoc.widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> 
m_param->maxLog2CUSize;
+cuLoc.skipHeight = false;
+cuLoc.skipWidth = false;
+
 if (m_param->scaleFactor)
-analysis->numPartitions *= factor;
+{
+/* Allocate memory for scaled resoultion's numPartitions and 
numCUsInFrame*/
+analysis->numPartitions = m_param->num4x4Partitions;
+analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
+
+/* Set skipWidth/skipHeight flags when the out of bound pixels in 
lowRes is greater than half of maxCUSize */
+extendedWidth = ((m_param->sourceWidth / 2 + m_param->maxCUSize - 1) 
>> m_param->maxLog2CUSize) * m_param->maxCUSize;
+extendedHeight = ((m_param->sourceHeight / 2 + m_param->maxCUSize - 1) 
>> m_param->maxLog2CUSize) * m_param->maxCUSize;
+uint32_t outOfBoundaryLowres = extendedWidth - m_param->sourceWidth / 
2;
+if (outOfBoundaryLowres * 2 >= m_param->maxCUSize)
+cuLoc.skipWidth = true;
+uint32_t outOfBoundaryLowresH = extendedHeight - m_param->sourceHeight 
/ 2;
+if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize)
+cuLoc.skipHeight = true;
+}
+
 /* Memory is allocated for inter and intra analysis data based on the 
slicetype */
 allocAnalysis(analysis);
+
+analysis->numPartitions = numPartitions * factor;
+analysis->numCUsInFrame = numCUsInFrame;
 if (m_param->bDisableLookahead && m_rateControl->m_isVbv)
 {
 X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), 
analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.intraVbvCost);
@@ -3345,6 +3372,11 @@
 X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), 
analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv);
 X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), 
analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.intraSatdForVbv);
 }
+
+cuLoc.evenRowIndex = 0;
+cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU;
+cuLoc.switchCondition = 0; // To switch between odd and even rows
+
 if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == 
X265_TYPE_I)
 {
 if (m_param->analysisReuseLevel < 2)
@@ -3365,17 +3397,30 @@
 for (uint32_t d = 0; d < depthBytes; d++)
 {
 int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
+int numCTUCopied = 1;
+
 if (m_param->scaleFactor)
 {
-if (depthBuf[d] == 0)
-depthBuf[d] = 1;
+if (!depthBuf[d]) //copy data of one 64x64 to four scaled 
64x64 CTUs.
+{
+bytes /= 4;
+numCTUCopied = 4;
+}
+
 if (partSizes[d] == SIZE_NxN)
 partSizes[d] = SIZE_2Nx2N;
+if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || 
(depthBuf[d] && m_param->maxCUSize != 64))
+depthBuf[d]--;
 }
-memset(&((analysis_intra_data 
*)analysis->intraData)->depth[count], depthBuf[d], bytes);
-memset(&((analysis_intra_data 
*)analysis->intraData)->chromaModes[count], modeBuf[d], bytes);
-memset(&((analysis_intra_data 
*)analysis->intraData)->partSizes[count], par

[x265] [PATCH] remove maxCTU size restriction in scaled save/load encodes

2018-03-04 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1519796358 -19800
#  Wed Feb 28 11:09:18 2018 +0530
# Node ID cf543136cbd0dd87e53bbee90358f157a47005ae
# Parent  0b781d592c8e6e0917dc5f152129bebb201e529d
remove maxCTU size restriction in scaled save/load encodes

The scaled save/load feature requires that the save encode has a maximum CTU
size of 32. The 32x32 blocks are mapped to a 64x64 block in load encode. Due to
this restriction we will be able to heirarchialy encode only 3 resolutions.
WxH - ctu 16
2Wx2H - ctu 32
4Wx4H - ctu 64

diff -r 0b781d592c8e -r cf543136cbd0 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppMon Mar 05 11:24:22 2018 +0530
+++ b/source/encoder/encoder.cppWed Feb 28 11:09:18 2018 +0530
@@ -3334,10 +3334,34 @@
 int scaledNumPartition = analysis->numPartitions;
 int factor = 1 << m_param->scaleFactor;
 
+int numPartitions = analysis->numPartitions;
+int numCUsInFrame = analysis->numCUsInFrame;
+cuLocation cuLoc;
+cuLoc.heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> 
m_param->maxLog2CUSize;
+cuLoc.widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> 
m_param->maxLog2CUSize;
+
 if (m_param->scaleFactor)
-analysis->numPartitions *= factor;
+{
+/* Allocate memory for scaled resoultion's numPartitions and 
numCUsInFrame*/
+analysis->numPartitions = m_param->num4x4Partitions;
+analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
+
+/* Set skipWidth/skipHeight flags when the out of bound pixels in 
lowRes is greater than half of maxCUSize */
+int extendedWidth = ((m_param->sourceWidth / 2 + m_param->maxCUSize - 
1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
+int extendedHeight = ((m_param->sourceHeight / 2 + m_param->maxCUSize 
- 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
+uint32_t outOfBoundaryLowres = extendedWidth - m_param->sourceWidth / 
2;
+if (outOfBoundaryLowres * 2 >= m_param->maxCUSize)
+cuLoc.skipWidth = true;
+uint32_t outOfBoundaryLowresH = extendedHeight - m_param->sourceHeight 
/ 2;
+if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize)
+cuLoc.skipHeight = true;
+}
+
 /* Memory is allocated for inter and intra analysis data based on the 
slicetype */
 allocAnalysis(analysis);
+
+analysis->numPartitions = numPartitions * factor;
+analysis->numCUsInFrame = numCUsInFrame;
 if (m_param->bDisableLookahead && m_rateControl->m_isVbv)
 {
 X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), 
analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.intraVbvCost);
@@ -3345,6 +3369,11 @@
 X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), 
analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv);
 X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), 
analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.intraSatdForVbv);
 }
+
+cuLoc.evenRowIndex = 0;
+cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU;
+cuLoc.switchCondition = 0; // To switch between odd and even rows
+
 if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == 
X265_TYPE_I)
 {
 if (m_param->analysisReuseLevel < 2)
@@ -3365,17 +3394,30 @@
 for (uint32_t d = 0; d < depthBytes; d++)
 {
 int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
+int numCTUCopied = 1;
+
 if (m_param->scaleFactor)
 {
-if (depthBuf[d] == 0)
-depthBuf[d] = 1;
+if (!depthBuf[d]) //copy data of one 64x64 to four scaled 
64x64 CTUs.
+{
+bytes /= 4;
+numCTUCopied = 4;
+}
+
 if (partSizes[d] == SIZE_NxN)
 partSizes[d] = SIZE_2Nx2N;
+if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || 
(depthBuf[d] && m_param->maxCUSize != 64))
+depthBuf[d]--;
 }
-memset(&((analysis_intra_data 
*)analysis->intraData)->depth[count], depthBuf[d], bytes);
-memset(&((analysis_intra_data 
*)analysis->intraData)->chromaModes[count], modeBuf[d], bytes);
-memset(&((analysis_intra_data 
*)analysis->intraData)->partSizes[count], partSizes[d], bytes);
-count += bytes;
+for (int numCTU = 0; numCTU < numCTUCopied; numCTU++)
+{
+memset(&((analysis_intra_data 
*)analysis->intraData)->depth[count], depthBuf[d], bytes);
+memset(&((analysis_intra_data 
*)analysis->intraData)->chromaModes[count], modeBuf[d], bytes);
+memset(&((analysis_intra_data 
*)analysis->intraData)->partSizes[count], partSizes[d], bytes);
+count += bytes;
+if (m_param->scaleFactor)
+

[x265] [PATCH] remove maxCTU size restriction in scaled save/load encodes

2018-03-05 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1519796358 -19800
#  Wed Feb 28 11:09:18 2018 +0530
# Node ID ce647bfa20e203ed1aeb8f944326ac15cb74
# Parent  0b781d592c8e6e0917dc5f152129bebb201e529d
remove maxCTU size restriction in scaled save/load encodes

The scaled save/load feature requires that the save encode has a maximum CTU
size of 32. The 32x32 blocks are mapped to a 64x64 block in load encode. Due to
this restriction we will be able to heirarchialy encode only 3 resolutions.
WxH - ctu 16
2Wx2H - ctu 32
4Wx4H - ctu 64

diff -r 0b781d592c8e -r ce647bfa20e2 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppMon Mar 05 11:24:22 2018 +0530
+++ b/source/encoder/encoder.cppWed Feb 28 11:09:18 2018 +0530
@@ -3334,10 +3334,33 @@
 int scaledNumPartition = analysis->numPartitions;
 int factor = 1 << m_param->scaleFactor;
 
+int numPartitions = analysis->numPartitions;
+int numCUsInFrame = analysis->numCUsInFrame;
+cuLocation cuLoc;
+cuLoc.init(m_param);
+
 if (m_param->scaleFactor)
-analysis->numPartitions *= factor;
+{
+/* Allocate memory for scaled resoultion's numPartitions and 
numCUsInFrame*/
+analysis->numPartitions = m_param->num4x4Partitions;
+analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
+
+/* Set skipWidth/skipHeight flags when the out of bound pixels in 
lowRes is greater than half of maxCUSize */
+int extendedWidth = ((m_param->sourceWidth / 2 + m_param->maxCUSize - 
1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
+int extendedHeight = ((m_param->sourceHeight / 2 + m_param->maxCUSize 
- 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
+uint32_t outOfBoundaryLowres = extendedWidth - m_param->sourceWidth / 
2;
+if (outOfBoundaryLowres * 2 >= m_param->maxCUSize)
+cuLoc.skipWidth = true;
+uint32_t outOfBoundaryLowresH = extendedHeight - m_param->sourceHeight 
/ 2;
+if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize)
+cuLoc.skipHeight = true;
+}
+
 /* Memory is allocated for inter and intra analysis data based on the 
slicetype */
 allocAnalysis(analysis);
+
+analysis->numPartitions = numPartitions * factor;
+analysis->numCUsInFrame = numCUsInFrame;
 if (m_param->bDisableLookahead && m_rateControl->m_isVbv)
 {
 X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), 
analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.intraVbvCost);
@@ -3345,6 +3368,7 @@
 X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), 
analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv);
 X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), 
analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.intraSatdForVbv);
 }
+
 if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == 
X265_TYPE_I)
 {
 if (m_param->analysisReuseLevel < 2)
@@ -3361,21 +3385,34 @@
 X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, 
intraPic->chromaModes);
 X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, m_analysisFileIn, 
intraPic->partSizes);
 
-size_t count = 0;
+uint32_t count = 0;
 for (uint32_t d = 0; d < depthBytes; d++)
 {
 int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
+int numCTUCopied = 1;
+
 if (m_param->scaleFactor)
 {
-if (depthBuf[d] == 0)
-depthBuf[d] = 1;
+if (!depthBuf[d]) //copy data of one 64x64 to four scaled 
64x64 CTUs.
+{
+bytes /= 4;
+numCTUCopied = 4;
+}
+
 if (partSizes[d] == SIZE_NxN)
 partSizes[d] = SIZE_2Nx2N;
+if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || 
(depthBuf[d] && m_param->maxCUSize != 64))
+depthBuf[d]--;
 }
-memset(&((analysis_intra_data 
*)analysis->intraData)->depth[count], depthBuf[d], bytes);
-memset(&((analysis_intra_data 
*)analysis->intraData)->chromaModes[count], modeBuf[d], bytes);
-memset(&((analysis_intra_data 
*)analysis->intraData)->partSizes[count], partSizes[d], bytes);
-count += bytes;
+for (int numCTU = 0; numCTU < numCTUCopied; numCTU++)
+{
+memset(&((analysis_intra_data 
*)analysis->intraData)->depth[count], depthBuf[d], bytes);
+memset(&((analysis_intra_data 
*)analysis->intraData)->chromaModes[count], modeBuf[d], bytes);
+memset(&((analysis_intra_data 
*)analysis->intraData)->partSizes[count], partSizes[d], bytes);
+count += bytes;
+if (m_param->scaleFactor)
+d += getCUIndex(&cuLoc, &count, bytes, 1);
+}
 }
 
 if (!m_

Re: [x265] [PATCH] remove maxCTU size restriction in scaled save/load encodes

2018-03-05 Thread Ashok Kumar Mishra
On Mon, Mar 5, 2018 at 2:19 PM,  wrote:

> # HG changeset patch
> # User Bhavna Hariharan 
> # Date 1519796358 -19800
> #  Wed Feb 28 11:09:18 2018 +0530
> # Node ID ce647bfa20e203ed1aeb8f944326ac15cb74
> # Parent  0b781d592c8e6e0917dc5f152129bebb201e529d
> remove maxCTU size restriction in scaled save/load encodes
>
> The scaled save/load feature requires that the save encode has a maximum
> CTU
> size of 32. The 32x32 blocks are mapped to a 64x64 block in load encode.
> Due to
> this restriction we will be able to heirarchialy encode only 3 resolutions.
> WxH - ctu 16
> 2Wx2H - ctu 32
> 4Wx4H - ctu 64
>
> diff -r 0b781d592c8e -r ce647bfa20e2 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cppMon Mar 05 11:24:22 2018 +0530
> +++ b/source/encoder/encoder.cppWed Feb 28 11:09:18 2018 +0530
> @@ -3334,10 +3334,33 @@
>  int scaledNumPartition = analysis->numPartitions;
>  int factor = 1 << m_param->scaleFactor;
>
> +int numPartitions = analysis->numPartitions;
> +int numCUsInFrame = analysis->numCUsInFrame;
> +cuLocation cuLoc;
> +cuLoc.init(m_param);
> +
>  if (m_param->scaleFactor)
> -analysis->numPartitions *= factor;
> +{
> +/* Allocate memory for scaled resoultion's numPartitions and
> numCUsInFrame*/
> +analysis->numPartitions = m_param->num4x4Partitions;
> +analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
> +
> +/* Set skipWidth/skipHeight flags when the out of bound pixels in
> lowRes is greater than half of maxCUSize */
> +int extendedWidth = ((m_param->sourceWidth / 2 +
> m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
> +int extendedHeight = ((m_param->sourceHeight / 2 +
> m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
> +uint32_t outOfBoundaryLowres = extendedWidth -
> m_param->sourceWidth / 2;
> +if (outOfBoundaryLowres * 2 >= m_param->maxCUSize)
> +cuLoc.skipWidth = true;
> +uint32_t outOfBoundaryLowresH = extendedHeight -
> m_param->sourceHeight / 2;
> +if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize)
> +cuLoc.skipHeight = true;
> +}
> +
>  /* Memory is allocated for inter and intra analysis data based on the
> slicetype */
>  allocAnalysis(analysis);
> +
> +analysis->numPartitions = numPartitions * factor;
> +analysis->numCUsInFrame = numCUsInFrame;
>  if (m_param->bDisableLookahead && m_rateControl->m_isVbv)
>  {
>  X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t),
> analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.
> intraVbvCost);
> @@ -3345,6 +3368,7 @@
>  X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t),
> analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv);
>  X265_FREAD(analysis->lookahead.intraSatdForVbv,
> sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn,
> picData->lookahead.intraSatdForVbv);
>  }
> +
>  if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
> X265_TYPE_I)
>  {
>  if (m_param->analysisReuseLevel < 2)
> @@ -3361,21 +3385,34 @@
>  X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->chromaModes);
>  X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->partSizes);
>
> -size_t count = 0;
> +uint32_t count = 0;
>  for (uint32_t d = 0; d < depthBytes; d++)
>  {
>  int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
> +int numCTUCopied = 1;
> +
>  if (m_param->scaleFactor)
>  {
> -if (depthBuf[d] == 0)
> -depthBuf[d] = 1;
> +if (!depthBuf[d]) //copy data of one 64x64 to four scaled
> 64x64 CTUs.
> +{
> +bytes /= 4;
> +numCTUCopied = 4;
> +}
> +
>  if (partSizes[d] == SIZE_NxN)
>  partSizes[d] = SIZE_2Nx2N;
> +if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) ||
> (depthBuf[d] && m_param->maxCUSize != 64))
> +depthBuf[d]--;
>  }
> -memset(&((analysis_intra_data 
> *)analysis->intraData)->depth[count],
> depthBuf[d], bytes);
> -memset(&((analysis_intra_data 
> *)analysis->intraData)->chromaModes[count],
> modeBuf[d], bytes);
> -memset(&((analysis_intra_data 
> *)analysis->intraData)->partSizes[count],
> partSizes[d], bytes);
> -count += bytes;
> +for (int numCTU = 0; numCTU < numCTUCopied; numCTU++)
> +{
> +memset(&((analysis_intra_data
> *)analysis->intraData)->depth[count], depthBuf[d], bytes);
> +memset(&((analysis_intra_data 
> *)analysis->intraData)->chromaModes[count],
> modeBuf[d], bytes);
> +memset(&((a