>From d516d0564888e154d88d89320302725d87bfab78 Mon Sep 17 00:00:00 2001 From: Srikanth Kurapati <srikanth.kurap...@multicorewareinc.com> Date: Wed, 30 Dec 2020 17:00:08 +0530 Subject: [PATCH] fix: corrects output mismatch for cutree enabled analysis save/load enodes with reuse-levels in between 1 to 10 for similar encoder settings.
--- source/abrEncApp.cpp | 14 +++- source/common/common.h | 3 +- source/common/cudata.h | 2 +- source/encoder/analysis.cpp | 31 ++++++++- source/encoder/analysis.h | 1 + source/encoder/api.cpp | 28 +++++++- source/encoder/encoder.cpp | 123 ++++++++++++++++++++++++++--------- source/encoder/slicetype.cpp | 2 +- source/x265.h | 4 +- 9 files changed, 166 insertions(+), 42 deletions(-) diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp index fa62ebf63..ea255e3f6 100644 --- a/source/abrEncApp.cpp +++ b/source/abrEncApp.cpp @@ -340,7 +340,12 @@ namespace X265_NS { memcpy(intraDst->partSizes, intraSrc->partSizes, sizeof(char) * src->depthBytes); memcpy(intraDst->chromaModes, intraSrc->chromaModes, sizeof(uint8_t) * src->depthBytes); if (m_param->rc.cuTree) - memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes); + { + if (m_param->analysisSaveReuseLevel == 10) + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * src->depthBytes); + else + memcpy(intraDst->cuQPOff, intraSrc->cuQPOff, sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS)); + } } else { @@ -355,7 +360,12 @@ namespace X265_NS { memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) * src->depthBytes); memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) * src->depthBytes); if (m_param->rc.cuTree) - memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes); + { + if (m_param->analysisReuseLevel == 10) + memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * src->depthBytes); + else + memcpy(interDst->cuQPOff, interSrc->cuQPOff, sizeof(int8_t) * (src->numCUsInFrame * MAX_NUM_CU_GEOMS)); + } if (m_param->analysisSaveReuseLevel > 4) { memcpy(interDst->partSize, interSrc->partSize, sizeof(uint8_t) * src->depthBytes); diff --git a/source/common/common.h b/source/common/common.h index 8c06cd79e..0ffbf17eb 100644 --- a/source/common/common.h +++ b/source/common/common.h @@ -326,7 +326,8 @@ typedef int16_t coeff_t; // transform coefficient #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422) #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420) -#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8 +#define MAX_NUM_CU_GEOMS 85 +#define X265_MAX_PRED_MODE_PER_CTU MAX_NUM_CU_GEOMS * 2 * 8 #define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // Maximum number of transform coefficients, for a 32x32 transform #define MAX_NUM_TR_CATEGORIES 16 // 32, 16, 8, 4 transform categories each for luma and chroma diff --git a/source/common/cudata.h b/source/common/cudata.h index 8397f0568..c7d9a1972 100644 --- a/source/common/cudata.h +++ b/source/common/cudata.h @@ -371,7 +371,7 @@ struct CUDataMemPool CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL) * numInstances); } else - { + { uint32_t sizeC = sizeL >> (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp)); CHECKED_MALLOC(trCoeffMemBlock, coeff_t, (sizeL + sizeC * 2) * numInstances); } diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp index aabf386ca..22a4ba74f 100644 --- a/source/encoder/analysis.cpp +++ b/source/encoder/analysis.cpp @@ -220,6 +220,9 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, con if (m_param->analysisSave && !m_param->analysisLoad) for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir; i++) m_reuseRef[i] = -1; + + if (m_param->rc.cuTree) + m_reuseQP = &m_reuseInterDataCTU->cuQPOff[ctu.m_cuAddr * MAX_NUM_CU_GEOMS]; } ProfileCUScope(ctu, totalCTUTime, totalCTUs); @@ -233,6 +236,8 @@ Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, con memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) * numPartition); memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition); } + if (m_param->rc.cuTree && reuseLevel > 1 && reuseLevel < 10) + m_reuseQP = &intraDataCTU->cuQPOff[ctu.m_cuAddr * MAX_NUM_CU_GEOMS]; compressIntraCU(ctu, cuGeom, qp); } else @@ -520,6 +525,9 @@ uint64_t Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom bool mightSplit = !(cuGeom.flags & CUGeom::LEAF); bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10) + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; + bool bAlreadyDecided = m_param->intraRefine != 4 && parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] != (uint8_t)ALL_IDX && !(m_param->bAnalysisType == HEVC_INFO); bool bDecidedDepth = m_param->intraRefine != 4 && parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth; int split = 0; @@ -870,6 +878,9 @@ uint32_t Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& c uint32_t minDepth = m_param->rdLevel <= 4 ? topSkipMinDepth(parentCTU, cuGeom) : 0; uint32_t splitRefs[4] = { 0, 0, 0, 0 }; + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10) + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; + X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not support RD 0 or 1\n"); PMODE pmode(*this, cuGeom); @@ -1152,6 +1163,8 @@ SplitData Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& uint32_t cuAddr = parentCTU.m_cuAddr; ModeDepth& md = m_modeDepth[depth]; + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10) + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; if (m_param->searchMethod == X265_SEA) { @@ -1856,6 +1869,9 @@ SplitData Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& ModeDepth& md = m_modeDepth[depth]; md.bestMode = NULL; + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel > 1 && m_param->analysisSaveReuseLevel < 10) + m_reuseQP[cuGeom.geomRecurId] = (int8_t)qp; + if (m_param->searchMethod == X265_SEA) { int numPredDir = m_slice->isInterP() ? 1 : 2; @@ -3647,11 +3663,20 @@ int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom, int3 if (m_param->analysisLoadReuseLevel >= 2 && m_param->rc.cuTree) { - int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + cuGeom.absPartIdx; + int cuIdx; + int8_t cuQPOffSet = 0; + + if (m_param->scaleFactor == 2 || m_param->analysisLoadReuseLevel == 10) + cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) + cuGeom.absPartIdx; + else + cuIdx = (ctu.m_cuAddr * MAX_NUM_CU_GEOMS) + cuGeom.geomRecurId; + if (ctu.m_slice->m_sliceType == I_SLICE) - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int32_t)(qp + 0.5 + ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx])); + cuQPOffSet = ((x265_analysis_intra_data*)m_frame->m_analysisData.intraData)->cuQPOff[cuIdx]; else - return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int32_t)(qp + 0.5 + ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx])); + cuQPOffSet = ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]; + + return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int32_t)(qp + 0.5 + cuQPOffSet)); } if (m_param->rc.hevcAq) { diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h index 3bcb56bc3..8d76d5c5e 100644 --- a/source/encoder/analysis.h +++ b/source/encoder/analysis.h @@ -126,6 +126,7 @@ protected: int32_t* m_reuseRef; uint8_t* m_reuseDepth; uint8_t* m_reuseModes; + int8_t * m_reuseQP; // array of QP values for analysis reuse at reuse levels > 1 and < 10 when cutree is enabled uint8_t* m_reusePartSize; uint8_t* m_reuseMergeFlag; x265_analysis_MV* m_reuseMv[2]; diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp index a986355e0..2c90fe8f2 100644 --- a/source/encoder/api.cpp +++ b/source/encoder/api.cpp @@ -825,7 +825,16 @@ void x265_alloc_analysis_data(x265_param *param, x265_analysis_data* analysis) CHECKED_MALLOC_ZERO(intraData->partSizes, char, analysis->numPartitions * analysis->numCUsInFrame); CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame); if (param->rc.cuTree) - CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame); + { + if (maxReuseLevel == 10) + { + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame); + } + else + { + CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t, MAX_NUM_CU_GEOMS * analysis->numCUsInFrame); + } + } } analysis->intraData = intraData; @@ -837,7 +846,16 @@ void x265_alloc_analysis_data(x265_param *param, x265_analysis_data* analysis) CHECKED_MALLOC_ZERO(interData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame); if (param->rc.cuTree && !isMultiPassOpt) - CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame); + { + if (maxReuseLevel == 10) + { + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, analysis->numPartitions * analysis->numCUsInFrame); + } + else + { + CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t, MAX_NUM_CU_GEOMS * analysis->numCUsInFrame); + } + } CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t, analysis->numPartitions * analysis->numCUsInFrame); CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t, analysis->numPartitions * analysis->numCUsInFrame); CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV, analysis->numPartitions * analysis->numCUsInFrame); @@ -919,7 +937,9 @@ void x265_free_analysis_data(x265_param *param, x265_analysis_data* analysis) X265_FREE((analysis->intraData)->partSizes); X265_FREE((analysis->intraData)->chromaModes); if (param->rc.cuTree) - X265_FREE((analysis->intraData)->cuQPOff); + { + X265_FREE_ZERO((analysis->intraData)->cuQPOff); + } } X265_FREE(analysis->intraData); analysis->intraData = NULL; @@ -931,7 +951,9 @@ void x265_free_analysis_data(x265_param *param, x265_analysis_data* analysis) X265_FREE((analysis->interData)->depth); X265_FREE((analysis->interData)->modes); if (!isMultiPassOpt && param->rc.cuTree) + { X265_FREE((analysis->interData)->cuQPOff); + } X265_FREE((analysis->interData)->mvpIdx[0]); X265_FREE((analysis->interData)->mvpIdx[1]); X265_FREE((analysis->interData)->mv[0]); diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp index 1f710e1ce..5eb123d31 100644 --- a/source/encoder/encoder.cpp +++ b/source/encoder/encoder.cpp @@ -4444,6 +4444,26 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x } } } + + int8_t *cuQPBuf = NULL, *cuQPOffSets = NULL; + uint32_t reuseBufSize = 0; + + if (m_param->rc.cuTree) + { + if (m_param->analysisLoadReuseLevel == 10) + reuseBufSize = depthBytes; + else if (m_param->analysisLoadReuseLevel > 1) + reuseBufSize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame; + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize); + if (!m_param->bUseAnalysisFile) + { + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) + cuQPOffSets = intraPic->cuQPOff; + else + cuQPOffSets = interPic->cuQPOff; + } + } + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) { if (m_param->bAnalysisType == HEVC_INFO) @@ -4452,19 +4472,21 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x return; uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSizes = NULL; - int8_t *cuQPBuf = NULL; tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); depthBuf = tempBuf; modeBuf = tempBuf + depthBytes; partSizes = tempBuf + 2 * depthBytes; - if (m_param->rc.cuTree) - cuQPBuf = X265_MALLOC(int8_t, depthBytes); X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->depth); X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->chromaModes); X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->partSizes); - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, intraPic->cuQPOff); } + if (m_param->rc.cuTree) + { + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets); + if (m_param->analysisLoadReuseLevel > 1 && m_param->analysisLoadReuseLevel < 10) + memcpy(analysis->intraData->cuQPOff, cuQPBuf, sizeof(int8_t) * reuseBufSize); + } size_t count = 0; for (uint32_t d = 0; d < depthBytes; d++) @@ -4480,7 +4502,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x memset(&(analysis->intraData)->depth[count], depthBuf[d], bytes); memset(&(analysis->intraData)->chromaModes[count], modeBuf[d], bytes); memset(&(analysis->intraData)->partSizes[count], partSizes[d], bytes); - if (m_param->rc.cuTree) + if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10) memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d], bytes); count += bytes; } @@ -4515,7 +4537,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; MV* mv[2]; int8_t* refIdx[2]; - int8_t* cuQPBuf = NULL; int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; bool bIntraInInter = false; @@ -4535,12 +4556,15 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf); depthBuf = tempBuf; modeBuf = tempBuf + depthBytes; - if (m_param->rc.cuTree) - cuQPBuf = X265_MALLOC(int8_t, depthBytes); X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->depth); X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->modes); - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); } + if (m_param->rc.cuTree) + { + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets); + if (m_param->analysisLoadReuseLevel > 1 && m_param->analysisLoadReuseLevel < 10) + memcpy(analysis->interData->cuQPOff, cuQPBuf, sizeof(int8_t) * reuseBufSize); + } if (m_param->analysisLoadReuseLevel > 4) { @@ -4578,7 +4602,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x depthBuf[d] = 1; memset(&(analysis->interData)->depth[count], depthBuf[d], bytes); memset(&(analysis->interData)->modes[count], modeBuf[d], bytes); - if (m_param->rc.cuTree) + if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10) memset(&(analysis->interData)->cuQPOff[count], cuQPBuf[d], bytes); if (m_param->analysisLoadReuseLevel > 4) { @@ -4736,7 +4760,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x int numPartitions = analysis->numPartitions; int numCUsInFrame = analysis->numCUsInFrame; int numCuInHeight = analysis->numCuInHeight; - /* Allocate memory for scaled resoultion's numPartitions and numCUsInFrame*/ + /* Allocate memory for scaled resolution's numPartitions and numCUsInFrame */ analysis->numPartitions = m_param->num4x4Partitions; analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU; analysis->numCuInHeight = cuLoc.heightInCU; @@ -4808,25 +4832,40 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x X265_FREE(vbvCostBuf); } + uint32_t reuseBufSize = 0; + int8_t *cuQPOffSets = NULL, *cuQPBuf = NULL; + if (m_param->rc.cuTree) + { + if (m_param->analysisLoadReuseLevel == 10) + reuseBufSize = depthBytes; + else if (m_param->analysisLoadReuseLevel > 1) + reuseBufSize = (MAX_NUM_CU_GEOMS / factor) * (analysis->numCUsInFrame); + cuQPBuf = X265_MALLOC(int8_t, reuseBufSize); + if (!m_param->bUseAnalysisFile) + { + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) + cuQPOffSets = intraPic->cuQPOff; + else + cuQPOffSets = interPic->cuQPOff; + } + } + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) { if (m_param->analysisLoadReuseLevel < 2) return; uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSizes = NULL; - int8_t *cuQPBuf = NULL; tempBuf = X265_MALLOC(uint8_t, depthBytes * 3); depthBuf = tempBuf; modeBuf = tempBuf + depthBytes; partSizes = tempBuf + 2 * depthBytes; - if (m_param->rc.cuTree) - cuQPBuf = X265_MALLOC(int8_t, depthBytes); X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->depth); X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->chromaModes); X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->partSizes); - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, intraPic->cuQPOff); } + if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets); } uint32_t count = 0; for (uint32_t d = 0; d < depthBytes; d++) @@ -4848,7 +4887,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x memset(&(analysis->intraData)->depth[count], depthBuf[d], bytes); memset(&(analysis->intraData)->chromaModes[count], modeBuf[d], bytes); memset(&(analysis->intraData)->partSizes[count], partSizes[d], bytes); - if (m_param->rc.cuTree) + if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10) memset(&(analysis->intraData)->cuQPOff[count], cuQPBuf[d], bytes); count += bytes; d += getCUIndex(&cuLoc, &count, bytes, 1); @@ -4886,7 +4925,6 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2]; MV* mv[2]; int8_t* refIdx[2]; - int8_t* cuQPBuf = NULL; int numBuf = m_param->analysisLoadReuseLevel > 4 ? 4 : 2; bool bIntraInInter = false; @@ -4900,12 +4938,16 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf); depthBuf = tempBuf; modeBuf = tempBuf + depthBytes; - if (m_param->rc.cuTree) - cuQPBuf = X265_MALLOC(int8_t, depthBytes); X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->depth); X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->modes); - if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); } + if (m_param->rc.cuTree) + { + X265_FREAD(cuQPBuf, sizeof(int8_t), reuseBufSize, m_analysisFileIn, cuQPOffSets); + if (m_param->analysisLoadReuseLevel > 1 && m_param->analysisLoadReuseLevel < 10) + memcpy(&(analysis->interData)->cuQPOff, cuQPBuf, sizeof(int8_t) * reuseBufSize); + } + if (m_param->analysisLoadReuseLevel > 4) { partSize = modeBuf + depthBytes; @@ -4954,7 +4996,7 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x { memset(&(analysis->interData)->depth[count], writeDepth, bytes); memset(&(analysis->interData)->modes[count], modeBuf[d], bytes); - if (m_param->rc.cuTree) + if (m_param->rc.cuTree && m_param->analysisLoadReuseLevel == 10) memset(&(analysis->interData)->cuQPOff[count], cuQPBuf[d], bytes); if (m_param->analysisLoadReuseLevel == 10 && bIntraInInter) memset(&(analysis->intraData)->chromaModes[count], chromaDir[d], bytes); @@ -5046,7 +5088,9 @@ void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x } } else + { X265_FREAD((analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, interPic->ref); + } consumedBytes += frameRecordSize; if (numDir == 1) @@ -5510,9 +5554,10 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD analysis->frameRecordSize += analysis->numCUsInFrame * sizeof(sse_t); } + uint32_t reuseQPBufsize = 0; if (m_param->analysisSaveReuseLevel > 1) { - + reuseQPBufsize = MAX_NUM_CU_GEOMS * analysis->numCUsInFrame; if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) { for (uint32_t cuAddr = 0; cuAddr < analysis->numCUsInFrame; cuAddr++) @@ -5536,12 +5581,21 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD partSize = ctu->m_partSize[absPartIdx]; intraDataCTU->partSizes[depthBytes] = partSize; - if (m_param->rc.cuTree) + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10) intraDataCTU->cuQPOff[depthBytes] = (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); absPartIdx += ctu->m_numPartitions >> (depth * 2); } + + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel < 10) + { + uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS; + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < nextCuIdx; i++) + intraDataCTU->cuQPOff[i] = (int8_t)(intraDataCTU->cuQPOff[i] - baseQP); + } memcpy(&intraDataCTU->modes[ctu->m_cuAddr * ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* ctu->m_numPartitions); } + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10) + reuseQPBufsize = depthBytes; } else { @@ -5567,7 +5621,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD predMode = 4; // used as indicator if the block is coded as bidir interDataCTU->modes[depthBytes] = predMode; - if (m_param->rc.cuTree) + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10) interDataCTU->cuQPOff[depthBytes] = (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP); if (m_param->analysisSaveReuseLevel > 4) @@ -5599,13 +5653,23 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD } absPartIdx += ctu->m_numPartitions >> (depth * 2); } + + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel < 10) + { + uint32_t nextCuIdx = (cuAddr + 1) * MAX_NUM_CU_GEOMS; + for (uint32_t i = cuAddr * MAX_NUM_CU_GEOMS; i < nextCuIdx ; i++) + interDataCTU->cuQPOff[i] = (int8_t)(interDataCTU->cuQPOff[i] - baseQP); + } + if (m_param->analysisSaveReuseLevel == 10 && bIntraInInter) memcpy(&intraDataCTU->modes[ctu->m_cuAddr * ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)* ctu->m_numPartitions); } + if (m_param->rc.cuTree && m_param->analysisSaveReuseLevel == 10) + reuseQPBufsize = depthBytes; } if ((analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree) - analysis->frameRecordSize += sizeof(uint8_t)* analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + (sizeof(int8_t) * depthBytes); + analysis->frameRecordSize += sizeof(uint8_t)* analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 + (sizeof(int8_t) * reuseQPBufsize); else if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) analysis->frameRecordSize += sizeof(uint8_t)* analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3; else @@ -5613,7 +5677,8 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag */ analysis->frameRecordSize += depthBytes * 2; if (m_param->rc.cuTree) - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes); + analysis->frameRecordSize += (sizeof(int8_t) * reuseQPBufsize); + if (m_param->analysisSaveReuseLevel > 4) analysis->frameRecordSize += (depthBytes * 2); @@ -5669,7 +5734,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t), depthBytes, m_analysisFileOut); X265_FWRITE((analysis->intraData)->partSizes, sizeof(char), depthBytes, m_analysisFileOut); if (m_param->rc.cuTree) - X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), depthBytes, m_analysisFileOut); + X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t), reuseQPBufsize, m_analysisFileOut); X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut); } else @@ -5677,7 +5742,7 @@ void Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t), depthBytes, m_analysisFileOut); X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t), depthBytes, m_analysisFileOut); if (m_param->rc.cuTree) - X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), depthBytes, m_analysisFileOut); + X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t), reuseQPBufsize, m_analysisFileOut); if (m_param->analysisSaveReuseLevel > 4) { X265_FWRITE((analysis->interData)->partSize, sizeof(uint8_t), depthBytes, m_analysisFileOut); @@ -5762,7 +5827,7 @@ void Encoder::writeAnalysisFileRefine(x265_analysis_data* analysis, FrameData &c interData->mv[1][depthBytes].word = ctu->m_mv[1][absPartIdx].word; interData->mvpIdx[1][depthBytes] = ctu->m_mvpIdx[1][absPartIdx]; ref[1][depthBytes] = ctu->m_refIdx[1][absPartIdx]; - predMode = 4; // used as indiacator if the block is coded as bidir + predMode = 4; // used as indicator if the block is coded as bidir } interData->modes[depthBytes] = predMode; diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp index 0adb0d0db..3bc01268b 100644 --- a/source/encoder/slicetype.cpp +++ b/source/encoder/slicetype.cpp @@ -1894,7 +1894,7 @@ void Lookahead::slicetypeAnalyse(Lowres **frames, bool bKeyframe) if (!framecnt) { - if (m_param->rc.cuTree) + if (m_param->rc.cuTree && !m_param->analysisLoad) cuTree(frames, 0, bKeyframe); return; } diff --git a/source/x265.h b/source/x265.h index f44040ba7..8d7a75826 100644 --- a/source/x265.h +++ b/source/x265.h @@ -144,7 +144,7 @@ typedef struct x265_analysis_intra_data uint8_t* modes; char* partSizes; uint8_t* chromaModes; - int8_t* cuQPOff; + int8_t* cuQPOff; }x265_analysis_intra_data; typedef struct x265_analysis_MV @@ -167,7 +167,7 @@ typedef struct x265_analysis_inter_data uint8_t* interDir; uint8_t* mvpIdx[2]; int8_t* refIdx[2]; - x265_analysis_MV* mv[2]; + x265_analysis_MV* mv[2]; int64_t* sadCost; int8_t* cuQPOff; }x265_analysis_inter_data; -- 2.20.1.windows.1 -- *With Regards,* *Srikanth Kurapati.*
0001-fix-corrects-output-mismatch-for-cutree-enabled-anal.patch
Description: Binary data
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel