1. Clarified: At this point of encode only pictures are available else we would have used the frames directly. The edge computation is a common function used for AQ4, histscenecut and EQT and in case of EQT and AQ4 (for which it was originally written) the functionality is available only on internal bit depth. When source and build are of same bitdepth this conversion does not happen in other cases this conversion happens but as the conversion code is in assembly its running faster than the normal flow. So there is no performance impact on this. Example for 10bit build the following are the encode numbers for 10bit /8 bit inputs of the same video (1080p resloution) for same number of frames. 10bit video - 136.72 secs, 8bit video - 81.20 secs (though there is conversion happening here). Simliar will be the case for 4K also. 2. optimized the else block too.
On Wed, Feb 12, 2020 at 7:06 PM Aruna Matheswaran < ar...@multicorewareinc.com> wrote: > > > On Wed, Feb 12, 2020 at 4:01 PM <srikanth.kurap...@multicorewareinc.com> > wrote: > >> # HG changeset patch >> # User Srikanth Kurapati >> # Date 1580113966 -19800 >> # Mon Jan 27 14:02:46 2020 +0530 >> # Node ID 5b4240c81ce34d2376bca0dad3f5aa6522db9008 >> # Parent 30d303b38c7bd1733aedd01a3f738fb08ec1488c >> Fix: Segmentation fault for hist-scenecut option in 16bpp builds. >> >> This patch fixes segmentation fault and incorrect edge computation due to >> bit >> depth mismatch between source input and x265 builds encountered using >> hist-scene >> cut option. >> >> diff -r 30d303b38c7b -r 5b4240c81ce3 source/common/common.h >> --- a/source/common/common.h Wed Jan 29 12:19:07 2020 +0530 >> +++ b/source/common/common.h Mon Jan 27 14:02:46 2020 +0530 >> @@ -131,7 +131,6 @@ >> typedef int64_t ssum2_t; >> #define SHIFT_TO_BITPLANE 9 >> #define HISTOGRAM_BINS 1024 >> -#define SHIFT 1 >> #else >> typedef uint8_t pixel; >> typedef uint16_t sum_t; >> @@ -140,7 +139,6 @@ >> typedef int32_t ssum2_t; // Signed sum >> #define SHIFT_TO_BITPLANE 7 >> #define HISTOGRAM_BINS 256 >> -#define SHIFT 0 >> #endif // if HIGH_BIT_DEPTH >> >> #if X265_DEPTH < 10 >> diff -r 30d303b38c7b -r 5b4240c81ce3 source/encoder/encoder.cpp >> --- a/source/encoder/encoder.cpp Wed Jan 29 12:19:07 2020 +0530 >> +++ b/source/encoder/encoder.cpp Mon Jan 27 14:02:46 2020 +0530 >> @@ -220,9 +220,9 @@ >> { >> for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; >> i++) >> { >> - m_planeSizes[i] = m_param->sourceWidth * >> m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]; >> - } >> - uint32_t pixelbytes = m_param->sourceBitDepth > 8 ? 2 : 1; >> + m_planeSizes[i] = (m_param->sourceWidth >> >> x265_cli_csps[p->internalCsp].width[i]) * (m_param->sourceHeight >> >> x265_cli_csps[m_param->internalCsp].height[i]); >> + } >> + uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1; >> m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes); >> m_edgeHistThreshold = m_param->edgeTransitionThreshold; >> m_chromaHistThreshold = m_edgeHistThreshold * 10.0; >> @@ -231,6 +231,23 @@ >> m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold, >> MAX_SCENECUT_THRESHOLD); >> m_scaledChromaThreshold = m_chromaHistThreshold * >> SCENECUT_STRENGTH_FACTOR; >> m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold, >> MAX_SCENECUT_THRESHOLD); >> + if (m_param->sourceBitDepth != m_param->internalBitDepth) >> + { >> + int size = m_param->sourceWidth * m_param->sourceHeight; >> + int hshift = CHROMA_H_SHIFT(m_param->internalCsp); >> + int vshift = CHROMA_V_SHIFT(m_param->internalCsp); >> + int widthC = m_param->sourceWidth >> hshift; >> + int heightC = m_param->sourceHeight >> vshift; >> + >> + m_inputPic[0] = X265_MALLOC(pixel, size); >> + if (m_param->internalCsp != X265_CSP_I400) >> + { >> + for (int j = 1; j < 3; j++) >> + { >> + m_inputPic[j] = X265_MALLOC(pixel, widthC * heightC); >> + } >> + } >> + } >> } >> >> // Do not allow WPP if only one row or fewer than 3 columns, it is >> pointless and unstable >> @@ -874,6 +891,18 @@ >> { >> X265_FREE_ZERO(m_edgePic); >> } >> + >> + if (m_param->sourceBitDepth != m_param->internalBitDepth) >> + { >> + X265_FREE_ZERO(m_inputPic[0]); >> + if (m_param->internalCsp != X265_CSP_I400) >> + { >> + for (int i = 1; i < 3; i++) >> + { >> + X265_FREE_ZERO(m_inputPic[i]); >> + } >> + } >> + } >> } >> >> for (int i = 0; i < m_param->frameNumThreads; i++) >> @@ -1337,11 +1366,86 @@ >> >> bool Encoder::computeHistograms(x265_picture *pic) >> { >> - pixel *src = (pixel *) pic->planes[0]; >> + pixel *src = NULL, *planeV = NULL, *planeU = NULL; >> + uint32_t widthC, heightC; >> + int hshift, vshift; >> + >> + hshift = CHROMA_H_SHIFT(pic->colorSpace); >> + vshift = CHROMA_V_SHIFT(pic->colorSpace); >> + widthC = pic->width >> hshift; >> + heightC = pic->height >> vshift; >> + >> + if (pic->bitDepth == X265_DEPTH) >> + { >> + src = (pixel*)pic->planes[0]; >> + if (m_param->internalCsp != X265_CSP_I400) >> + { >> + planeU = (pixel*)pic->planes[1]; >> + planeV = (pixel*)pic->planes[2]; >> + } >> + } >> + else if (pic->bitDepth == 8 && X265_DEPTH > 8) >> + { >> + int shift = (X265_DEPTH - 8); >> + uint8_t *yChar, *uChar, *vChar; >> + >> + yChar = (uint8_t*)pic->planes[0]; >> + primitives.planecopy_cp(yChar, pic->stride[0] / sizeof(*yChar), >> m_inputPic[0], pic->stride[0] / sizeof(*yChar), pic->width, pic->height, >> shift); >> + src = m_inputPic[0]; >> + if (m_param->internalCsp != X265_CSP_I400) >> + { >> + uChar = (uint8_t*)pic->planes[1]; >> + vChar = (uint8_t*)pic->planes[2]; >> + primitives.planecopy_cp(uChar, pic->stride[1] / >> sizeof(*uChar), m_inputPic[1], pic->stride[1] / sizeof(*uChar), widthC, >> heightC, shift); >> + primitives.planecopy_cp(vChar, pic->stride[2] / >> sizeof(*vChar), m_inputPic[2], pic->stride[2] / sizeof(*vChar), widthC, >> heightC, shift); >> + planeU = m_inputPic[1]; >> + planeV = m_inputPic[2]; >> + } >> + } >> + else >> + { >> + uint16_t *yShort, *uShort, *vShort; >> + /* mask off bits that are supposed to be zero */ >> + uint16_t mask = (1 << X265_DEPTH) - 1; >> + int shift = abs(pic->bitDepth - X265_DEPTH); >> > This block still needs to be optimized. > >> + >> + yShort = (uint16_t*)pic->planes[0]; >> + if (pic->bitDepth > X265_DEPTH) >> + { >> + /* shift right and mask pixels to final size */ >> + primitives.planecopy_sp(yShort, pic->stride[0] / >> sizeof(*yShort), m_inputPic[0], pic->stride[0] / sizeof(*yShort), >> pic->width, pic->height, shift, mask); >> + } >> + else /* Case for (pic.bitDepth <= X265_DEPTH) */ >> + { >> + /* shift left and mask pixels to final size */ >> + primitives.planecopy_sp_shl(yShort, pic->stride[0] / >> sizeof(*yShort), m_inputPic[0], pic->stride[0] / sizeof(*yShort), >> pic->width, pic->height, shift, mask); >> + } >> + src = m_inputPic[0]; >> + >> + if (m_param->internalCsp != X265_CSP_I400) >> + { >> + uShort = (uint16_t*)pic->planes[1]; >> + vShort = (uint16_t*)pic->planes[2]; >> + >> + if (pic->bitDepth > X265_DEPTH) >> + { >> + primitives.planecopy_sp(uShort, pic->stride[1] / >> sizeof(*uShort), m_inputPic[1], pic->stride[1] / sizeof(*uShort), widthC, >> heightC, shift, mask); >> + primitives.planecopy_sp(vShort, pic->stride[2] / >> sizeof(*vShort), m_inputPic[2], pic->stride[2] / sizeof(*vShort), widthC, >> heightC, shift, mask); >> + } >> + else /* Case for (pic.bitDepth <= X265_DEPTH) */ >> + { >> + primitives.planecopy_sp_shl(uShort, pic->stride[1] / >> sizeof(*uShort), m_inputPic[1], pic->stride[1] / sizeof(*uShort), widthC, >> heightC, shift, mask); >> + primitives.planecopy_sp_shl(vShort, pic->stride[2] / >> sizeof(*vShort), m_inputPic[2], pic->stride[2] / sizeof(*vShort), widthC, >> heightC, shift, mask); >> + } >> + >> + planeU = m_inputPic[1]; >> + planeV = m_inputPic[2]; >> + } >> + } >> + >> size_t bufSize = sizeof(pixel) * m_planeSizes[0]; >> int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes; >> - int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1; >> - memset(m_edgePic, 0, bufSize * numBytes); >> + memset(m_edgePic, 0, bufSize); >> >> if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, >> pic->width, false, 1)) >> { >> @@ -1350,10 +1454,9 @@ >> } >> >> pixel pixelVal; >> - int64_t size = pic->height * (pic->stride[0] >> SHIFT); >> int32_t *edgeHist = m_curEdgeHist; >> memset(edgeHist, 0, 2 * sizeof(int32_t)); >> - for (int64_t i = 0; i < size; i++) >> + for (int64_t i = 0; i < m_planeSizes[0]; i++) >> { >> if (!m_edgePic[i]) >> edgeHist[0]++; >> @@ -1364,16 +1467,12 @@ >> if (pic->colorSpace != X265_CSP_I400) >> { >> /* U Histogram Calculation */ >> - int32_t HeightL = (pic->height >> >> x265_cli_csps[pic->colorSpace].height[1]); >> - size = HeightL * (pic->stride[1] >> SHIFT); >> int32_t *uHist = m_curUVHist[0]; >> - pixel *chromaPlane = (pixel *) pic->planes[1]; >> - >> memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t)); >> >> - for (int64_t i = 0; i < size; i++) >> - { >> - pixelVal = chromaPlane[i]; >> + for (int64_t i = 0; i < m_planeSizes[1]; i++) >> + { >> + pixelVal = planeU[i]; >> uHist[pixelVal]++; >> } >> >> @@ -1381,15 +1480,12 @@ >> if (planeCount == 3) >> { >> pixelVal = 0; >> - int32_t heightV = (pic->height >> >> x265_cli_csps[pic->colorSpace].height[2]); >> - size = heightV * (pic->stride[2] >> SHIFT); >> int32_t *vHist = m_curUVHist[1]; >> - chromaPlane = (pixel *) pic->planes[2]; >> - >> memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t)); >> - for (int64_t i = 0; i < size; i++) >> + >> + for (int64_t i = 0; i < m_planeSizes[2]; i++) >> { >> - pixelVal = chromaPlane[i]; >> + pixelVal = planeV[i]; >> vHist[pixelVal]++; >> } >> for (int i = 0; i < HISTOGRAM_BINS; i++) >> diff -r 30d303b38c7b -r 5b4240c81ce3 source/encoder/encoder.h >> --- a/source/encoder/encoder.h Wed Jan 29 12:19:07 2020 +0530 >> +++ b/source/encoder/encoder.h Mon Jan 27 14:02:46 2020 +0530 >> @@ -255,6 +255,7 @@ >> >> /* For histogram based scene-cut detection */ >> pixel* m_edgePic; >> + pixel* m_inputPic[3]; >> int32_t m_curUVHist[2][HISTOGRAM_BINS]; >> int32_t m_curMaxUVHist[HISTOGRAM_BINS]; >> int32_t m_prevMaxUVHist[HISTOGRAM_BINS]; >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel >> > > > -- > Regards, > *Aruna Matheswaran,* > Video Codec Engineer, > Media & AI analytics BU, > > > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > -- *With Regards,* *Srikanth Kurapati.*
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel