# HG changeset patch # User Santhoshini Sekar<santhosh...@multicorewareinc.com> # Date 1464152658 -19800 # Wed May 25 10:34:18 2016 +0530 # Node ID 028ccaa6486047ffafef38d78f9b00880908c0c3 # Parent 6d3849d648f0be5a8e334f1d75a2f7cf93c86cb3 Allow different Csp for picture
diff -r 6d3849d648f0 -r 028ccaa64860 source/common/cudata.cpp --- a/source/common/cudata.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/common/cudata.cpp Wed May 25 10:34:18 2016 +0530 @@ -527,7 +527,7 @@ } /* Only called by encodeResidue, these fields can be modified during inter/intra coding */ -void CUData::updatePic(uint32_t depth) const +void CUData::updatePic(uint32_t depth, int picCsp) const { CUData& ctu = *m_encData->getPicCTU(m_cuAddr); @@ -541,7 +541,7 @@ uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY); - if (ctu.m_chromaFormat != X265_CSP_I400) + if (ctu.m_chromaFormat != X265_CSP_I400 && picCsp != X265_CSP_I400) { m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); diff -r 6d3849d648f0 -r 028ccaa64860 source/common/cudata.h --- a/source/common/cudata.h Sun May 29 21:50:25 2016 +0800 +++ b/source/common/cudata.h Wed May 25 10:34:18 2016 +0530 @@ -224,7 +224,7 @@ /* RD-0 methods called only from encodeResidue */ void copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp = true); - void updatePic(uint32_t depth) const; + void updatePic(uint32_t depth, int picCsp) const; void setPartSizeSubParts(PartSize size) { m_partSet(m_partSize, (uint8_t)size); } void setPredModeSubParts(PredMode mode) { m_partSet(m_predMode, (uint8_t)mode); } diff -r 6d3849d648f0 -r 028ccaa64860 source/common/frame.cpp --- a/source/common/frame.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/common/frame.cpp Wed May 25 10:34:18 2016 +0530 @@ -72,7 +72,7 @@ m_reconPic = new PicYuv; m_param = param; m_encData->m_reconPic = m_reconPic; - bool ok = m_encData->create(*param, sps) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp); + bool ok = m_encData->create(*param, sps, m_fencPic->m_picCsp) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp); if (ok) { /* initialize right border of m_reconpicYuv as SAO may read beyond the diff -r 6d3849d648f0 -r 028ccaa64860 source/common/framedata.cpp --- a/source/common/framedata.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/common/framedata.cpp Wed May 25 10:34:18 2016 +0530 @@ -31,11 +31,12 @@ memset(this, 0, sizeof(*this)); } -bool FrameData::create(const x265_param& param, const SPS& sps) +bool FrameData::create(const x265_param& param, const SPS& sps, int csp) { m_param = ¶m; m_slice = new Slice; m_picCTU = new CUData[sps.numCUsInFrame]; + m_picCsp = csp; m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame); for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++) diff -r 6d3849d648f0 -r 028ccaa64860 source/common/framedata.h --- a/source/common/framedata.h Sun May 29 21:50:25 2016 +0800 +++ b/source/common/framedata.h Wed May 25 10:34:18 2016 +0530 @@ -146,10 +146,11 @@ double m_avgQpRc; /* avg QP as decided by rate-control */ double m_avgQpAq; /* avg QP as decided by AQ in addition to rate-control */ double m_rateFactor; /* calculated based on the Frame QP */ + int m_picCsp; FrameData(); - bool create(const x265_param& param, const SPS& sps); + bool create(const x265_param& param, const SPS& sps, int csp); void reinit(const SPS& sps); void destroy(); inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; } diff -r 6d3849d648f0 -r 028ccaa64860 source/common/picyuv.cpp --- a/source/common/picyuv.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/common/picyuv.cpp Wed May 25 10:34:18 2016 +0530 @@ -180,6 +180,7 @@ * warnings from valgrind about using uninitialized pixels */ padx++; pady++; + m_picCsp = pic.colorSpace; X265_CHECK(pic.bitDepth >= 8, "pic.bitDepth check failure"); @@ -194,7 +195,7 @@ primitives.planecopy_cp(yChar, pic.stride[0] / sizeof(*yChar), yPixel, m_stride, width, height, shift); - if (pic.colorSpace != X265_CSP_I400) + if (param.internalCsp != X265_CSP_I400) { pixel *uPixel = m_picOrg[1]; pixel *vPixel = m_picOrg[2]; @@ -220,7 +221,7 @@ yChar += pic.stride[0] / sizeof(*yChar); } - if (pic.colorSpace != X265_CSP_I400) + if (param.internalCsp != X265_CSP_I400) { pixel *uPixel = m_picOrg[1]; pixel *vPixel = m_picOrg[2]; @@ -262,7 +263,7 @@ primitives.planecopy_sp_shl(yShort, pic.stride[0] / sizeof(*yShort), yPixel, m_stride, width, height, shift, mask); } - if (pic.colorSpace != X265_CSP_I400) + if (param.internalCsp != X265_CSP_I400) { pixel *uPixel = m_picOrg[1]; pixel *vPixel = m_picOrg[2]; @@ -312,7 +313,7 @@ for (int i = 1; i <= pady; i++) memcpy(Y + i * m_stride, Y, (width + padx) * sizeof(pixel)); - if (pic.colorSpace != X265_CSP_I400) + if (param.internalCsp != X265_CSP_I400) { for (int r = 0; r < height >> m_vChromaShift; r++) { diff -r 6d3849d648f0 -r 028ccaa64860 source/common/shortyuv.cpp --- a/source/common/shortyuv.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/common/shortyuv.cpp Wed May 25 10:34:18 2016 +0530 @@ -78,11 +78,11 @@ memset(m_buf[2], 0, (m_csize * m_csize) * sizeof(int16_t)); } -void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size) +void ShortYuv::subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size, int picCsp) { const int sizeIdx = log2Size - 2; primitives.cu[sizeIdx].sub_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size); - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400) { primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize); primitives.chroma[m_csp].cu[sizeIdx].sub_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize); diff -r 6d3849d648f0 -r 028ccaa64860 source/common/shortyuv.h --- a/source/common/shortyuv.h Sun May 29 21:50:25 2016 +0800 +++ b/source/common/shortyuv.h Wed May 25 10:34:18 2016 +0530 @@ -64,7 +64,7 @@ const int16_t* getCrAddr(uint32_t absPartIdx) const { return m_buf[2] + getChromaAddrOffset(absPartIdx); } const int16_t* getChromaAddr(uint32_t chromaId, uint32_t partUnitIdx) const { return m_buf[chromaId] + getChromaAddrOffset(partUnitIdx); } - void subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size); + void subtract(const Yuv& srcYuv0, const Yuv& srcYuv1, uint32_t log2Size, int picCsp); void copyPartToPartLuma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2Size) const; void copyPartToPartChroma(ShortYuv& dstYuv, uint32_t absPartIdx, uint32_t log2SizeL) const; diff -r 6d3849d648f0 -r 028ccaa64860 source/common/yuv.cpp --- a/source/common/yuv.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/common/yuv.cpp Wed May 25 10:34:18 2016 +0530 @@ -163,14 +163,19 @@ } } -void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL) +void Yuv::addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp) { primitives.cu[log2SizeL - 2].add_ps(m_buf[0], m_size, srcYuv0.m_buf[0], srcYuv1.m_buf[0], srcYuv0.m_size, srcYuv1.m_size); - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && picCsp != X265_CSP_I400) { primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv1.m_buf[1], srcYuv0.m_csize, srcYuv1.m_csize); primitives.chroma[m_csp].cu[log2SizeL - 2].add_ps(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv1.m_buf[2], srcYuv0.m_csize, srcYuv1.m_csize); } + if (picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400) + { + primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[1], m_csize, srcYuv0.m_buf[1], srcYuv0.m_csize); + primitives.chroma[m_csp].cu[m_part].copy_pp(m_buf[2], m_csize, srcYuv0.m_buf[2], srcYuv0.m_csize); + } } void Yuv::addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) diff -r 6d3849d648f0 -r 028ccaa64860 source/common/yuv.h --- a/source/common/yuv.h Sun May 29 21:50:25 2016 +0800 +++ b/source/common/yuv.h Wed May 25 10:34:18 2016 +0530 @@ -73,7 +73,7 @@ void copyPartToYuv(Yuv& dstYuv, uint32_t absPartIdx) const; // Clip(srcYuv0 + srcYuv1) -> m_buf .. aka recon = clip(pred + residual) - void addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL); + void addClip(const Yuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t log2SizeL, int picCsp); // (srcYuv0 + srcYuv1)/2 for YUV partition (bidir averaging) void addAvg(const ShortYuv& srcYuv0, const ShortYuv& srcYuv1, uint32_t absPartIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma); diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/encoder/analysis.cpp Wed May 25 10:34:18 2016 +0530 @@ -1194,7 +1194,7 @@ if (m_param->rdLevel >= 3) { /* Calculate RD cost of best inter option */ - if (!m_bChromaSa8d && (m_csp != X265_CSP_I400)) /* When m_bChromaSa8d is enabled, chroma MC has already been done */ + if ((!m_bChromaSa8d && (m_csp != X265_CSP_I400)) || (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)) /* When m_bChromaSa8d is enabled, chroma MC has already been done */ { uint32_t numPU = bestInter->cu.getNumPartInter(0); for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) @@ -1213,6 +1213,13 @@ if (m_slice->m_sliceType == B_SLICE && md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 && md.pred[PRED_BIDIR].sa8dCost * 16 <= bestInter->sa8dCost * 17) { + uint32_t numPU = md.pred[PRED_BIDIR].cu.getNumPartInter(0); + if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400) + for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) + { + PredictionUnit pu(md.pred[PRED_BIDIR].cu, cuGeom, puIdx); + motionCompensation(md.pred[PRED_BIDIR].cu, pu, md.pred[PRED_BIDIR].predYuv, true, true); + } encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom); checkBestMode(md.pred[PRED_BIDIR], depth); } @@ -1290,10 +1297,10 @@ uint32_t tuDepthRange[2]; cu.getInterTUQtDepthRange(tuDepthRange, 0); - m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize); + m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, md.bestMode->predYuv, cuGeom.log2CUSize, m_frame->m_fencPic->m_picCsp); residualTransformQuantInter(*md.bestMode, cuGeom, 0, 0, tuDepthRange); if (cu.getQtRootCbf(0)) - md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0]); + md.bestMode->reconYuv.addClip(md.bestMode->predYuv, m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0], m_frame->m_fencPic->m_picCsp); else { md.bestMode->reconYuv.copyFromYuv(md.bestMode->predYuv); @@ -1524,6 +1531,13 @@ checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom); if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64) { + uint32_t numPU = md.pred[PRED_BIDIR].cu.getNumPartInter(0); + if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400) + for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) + { + PredictionUnit pu(md.pred[PRED_BIDIR].cu, cuGeom, puIdx); + motionCompensation(md.pred[PRED_BIDIR].cu, pu, md.pred[PRED_BIDIR].predYuv, true, true); + } encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR], cuGeom); checkBestMode(md.pred[PRED_BIDIR], cuGeom.depth); } @@ -1701,6 +1715,17 @@ ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]); } } + if ((md.bestMode->cu.isInter(0) && !(md.bestMode->cu.m_mergeFlag[0] && md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)) && (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)) + { + uint32_t numPU = md.bestMode->cu.getNumPartInter(0); + + for (uint32_t puIdx = 0; puIdx < numPU; puIdx++) + { + PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx); + motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, m_csp != X265_CSP_I400); + } + encodeResAndCalcRdInterCU(*md.bestMode, cuGeom); + } } if (m_bTryLossless) @@ -1912,11 +1937,11 @@ tempPred->cu.m_mv[1][0] = candMvField[i][1].mv; tempPred->cu.m_refIdx[0][0] = (int8_t)candMvField[i][0].refIdx; tempPred->cu.m_refIdx[1][0] = (int8_t)candMvField[i][1].refIdx; - motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400)); + motionCompensation(tempPred->cu, pu, tempPred->predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)); tempPred->sa8dBits = getTUBits(i, numMergeCand); tempPred->distortion = primitives.cu[sizeIdx].sa8d(fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size); - if (m_bChromaSa8d && (m_csp != X265_CSP_I400)) + if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)) { tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize); tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize); @@ -1935,7 +1960,7 @@ return; /* calculate the motion compensation for chroma for the best mode selected */ - if (!m_bChromaSa8d && (m_csp != X265_CSP_I400)) /* Chroma MC was done above */ + if ((!m_bChromaSa8d && (m_csp != X265_CSP_I400)) || (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400)) /* Chroma MC was done above */ motionCompensation(bestPred->cu, pu, bestPred->predYuv, false, true); if (m_param->rdLevel) @@ -2118,14 +2143,14 @@ bestME[i].ref = m_reuseRef[refOffset + index++]; } } - predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400), refMask); + predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400), refMask); /* predInterSearch sets interMode.sa8dBits */ const Yuv& fencYuv = *interMode.fencYuv; Yuv& predYuv = interMode.predYuv; int part = partitionFromLog2Size(cuGeom.log2CUSize); interMode.distortion = primitives.cu[part].sa8d(fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size); - if (m_bChromaSa8d && (m_csp != X265_CSP_I400)) + if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)) { interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize); interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize); @@ -2167,7 +2192,7 @@ bestME[i].ref = m_reuseRef[refOffset + index++]; } } - predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400, refMask); + predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, refMask); /* predInterSearch sets interMode.sa8dBits, but this is ignored */ encodeResAndCalcRdInterCU(interMode, cuGeom); @@ -2230,10 +2255,10 @@ cu.m_mvd[1][0] = bestME[1].mv - mvp1; PredictionUnit pu(cu, cuGeom, 0); - motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400)); + motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)); int sa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size); - if (m_bChromaSa8d && (m_csp != X265_CSP_I400)) + if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)) { /* Add in chroma distortion */ sa8d += primitives.chroma[m_csp].cu[partEnum].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[1], bidir2Nx2N.predYuv.m_csize); @@ -2264,7 +2289,7 @@ int zsa8d; - if (m_bChromaSa8d && (m_csp != X265_CSP_I400)) + if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)) { cu.m_mv[0][0] = mvzero; cu.m_mv[1][0] = mvzero; @@ -2312,9 +2337,9 @@ if (m_bChromaSa8d) /* real MC was already performed */ bidir2Nx2N.predYuv.copyFromYuv(tmpPredYuv); else - motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_csp != X265_CSP_I400); + motionCompensation(cu, pu, bidir2Nx2N.predYuv, true, m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400); } - else if (m_bChromaSa8d && (m_csp != X265_CSP_I400)) + else if (m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)) { /* recover overwritten motion vectors */ cu.m_mv[0][0] = bestME[0].mv; @@ -2360,7 +2385,7 @@ cu.getIntraTUQtDepthRange(tuDepthRange, 0); residualTransformQuantIntra(*bestMode, cuGeom, 0, 0, tuDepthRange); - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { getBestIntraModeChroma(*bestMode, cuGeom); residualQTIntraChroma(*bestMode, cuGeom, 0, 0); @@ -2384,7 +2409,7 @@ fencYuv.m_buf[0], predY, fencYuv.m_size, predYuv.m_size); - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { pixel* predU = predYuv.getCbAddr(absPartIdx); pixel* predV = predYuv.getCrAddr(absPartIdx); @@ -2414,7 +2439,7 @@ else primitives.cu[sizeIdx].copy_pp(reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride, predY, predYuv.m_size); - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { pixel* predU = predYuv.getCbAddr(absPartIdx); pixel* predV = predYuv.getCrAddr(absPartIdx); @@ -2434,7 +2459,7 @@ } } - cu.updatePic(cuGeom.depth); + cu.updatePic(cuGeom.depth, m_frame->m_fencPic->m_picCsp); } void Analysis::addSplitFlagCost(Mode& mode, uint32_t depth) diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/encoder/encoder.cpp Wed May 25 10:34:18 2016 +0530 @@ -511,12 +511,6 @@ if (pic_in) { - if (pic_in->colorSpace != m_param->internalCsp) - { - x265_log(m_param, X265_LOG_ERROR, "Unsupported chroma subsampling (%d) on input\n", - pic_in->colorSpace); - return -1; - } if (pic_in->bitDepth < 8 || pic_in->bitDepth > 16) { x265_log(m_param, X265_LOG_ERROR, "Input bit depth (%d) must be between 8 and 16\n", @@ -538,7 +532,7 @@ { inFrame->m_fencPic->m_cuOffsetY = m_sps.cuOffsetY; inFrame->m_fencPic->m_buOffsetY = m_sps.buOffsetY; - if (pic_in->colorSpace != X265_CSP_I400) + if (m_param->internalCsp != X265_CSP_I400) { inFrame->m_fencPic->m_cuOffsetC = m_sps.cuOffsetC; inFrame->m_fencPic->m_buOffsetC = m_sps.buOffsetC; @@ -558,7 +552,7 @@ { m_sps.cuOffsetY = inFrame->m_fencPic->m_cuOffsetY; m_sps.buOffsetY = inFrame->m_fencPic->m_buOffsetY; - if (pic_in->colorSpace != X265_CSP_I400) + if (m_param->internalCsp != X265_CSP_I400) { m_sps.cuOffsetC = inFrame->m_fencPic->m_cuOffsetC; m_sps.cuOffsetY = inFrame->m_fencPic->m_cuOffsetY; diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/motion.cpp --- a/source/encoder/motion.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/encoder/motion.cpp Wed May 25 10:34:18 2016 +0530 @@ -183,7 +183,7 @@ } /* Called by Search::predInterSearch() or --pme equivalent, chroma residual might be considered */ -void MotionEstimate::setSourcePU(const Yuv& srcFencYuv, int _ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int method, const int refine) +void MotionEstimate::setSourcePU(const Yuv& srcFencYuv, int _ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int method, const int refine, bool bChroma) { partEnum = partitionFromSizes(pwidth, pheight); X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n"); @@ -200,7 +200,7 @@ /* Enable chroma residual cost if subpelRefine level is greater than 2 and chroma block size * is an even multiple of 4x4 pixels (indicated by non-null chromaSatd pointer) */ - bChromaSATD = subpelRefine > 2 && chromaSatd && (srcFencYuv.m_csp != X265_CSP_I400); + bChromaSATD = subpelRefine > 2 && chromaSatd && (srcFencYuv.m_csp != X265_CSP_I400 && bChroma); X265_CHECK(!(bChromaSATD && !workload[subpelRefine].hpel_satd), "Chroma SATD cannot be used with SAD hpel\n"); ctuAddr = _ctuAddr; diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/motion.h --- a/source/encoder/motion.h Sun May 29 21:50:25 2016 +0800 +++ b/source/encoder/motion.h Wed May 25 10:34:18 2016 +0530 @@ -75,7 +75,7 @@ /* Methods called at slice setup */ void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight, const int searchMethod, const int subpelRefine); - void setSourcePU(const Yuv& srcFencYuv, int ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int searchMethod, const int subpelRefine); + void setSourcePU(const Yuv& srcFencYuv, int ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int searchMethod, const int subpelRefine, bool bChroma); /* buf*() and motionEstimate() methods all use cached fenc pixels and thus * require setSourcePU() to be called prior. */ diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/reference.cpp --- a/source/encoder/reference.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/encoder/reference.cpp Wed May 25 10:34:18 2016 +0530 @@ -68,7 +68,7 @@ intptr_t stride = reconPic->m_stride; int cuHeight = g_maxCUSize; - for (int c = 0; c < (p.internalCsp != X265_CSP_I400 ? numInterpPlanes : 1); c++) + for (int c = 0; c < (p.internalCsp != X265_CSP_I400 && recPic->m_picCsp != X265_CSP_I400 ? numInterpPlanes : 1); c++) { if (c == 1) { diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/sao.cpp --- a/source/encoder/sao.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/encoder/sao.cpp Wed May 25 10:34:18 2016 +0530 @@ -255,7 +255,7 @@ } saoParam->bSaoFlag[0] = true; - saoParam->bSaoFlag[1] = m_param->internalCsp != X265_CSP_I400; + saoParam->bSaoFlag[1] = m_param->internalCsp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400; m_numNoSao[0] = 0; // Luma m_numNoSao[1] = 0; // Chroma @@ -935,7 +935,7 @@ memset(m_offsetOrgPreDblk[addr], 0, sizeof(PerPlane)); int plane_offset = 0; - for (int plane = 0; plane < (frame->m_param->internalCsp != X265_CSP_I400 ? NUM_PLANE : 1); plane++) + for (int plane = 0; plane < (frame->m_param->internalCsp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400? NUM_PLANE : 1); plane++) { if (plane == 1) { @@ -1208,7 +1208,7 @@ const int addrMerge[2] = {(idxX ? addr - 1 : -1), (rowBaseAddr ? addr - m_numCuInWidth : -1)};// left, up - bool chroma = m_param->internalCsp != X265_CSP_I400; + bool chroma = m_param->internalCsp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400; int planes = chroma ? 3 : 1; // reset stats Y, Cb, Cr diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/search.cpp --- a/source/encoder/search.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/encoder/search.cpp Wed May 25 10:34:18 2016 +0530 @@ -1727,6 +1727,12 @@ else cu.getAllowedChromaDir(absPartIdxC, modeList); + if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400) + { + for (uint32_t l = 1; l < NUM_CHROMA_MODE; l++) + modeList[l] = modeList[0]; + maxMode = 1; + } // check chroma modes for (uint32_t mode = minMode; mode < maxMode; mode++) { @@ -1966,7 +1972,8 @@ slave.m_frame = m_frame; slave.m_param = m_param; slave.setLambdaFromQP(pme.mode.cu, m_rdCost.m_qp); - slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.pu.ctuAddr, pme.pu.cuAbsPartIdx, pme.pu.puAbsPartIdx, pme.pu.width, pme.pu.height, m_param->searchMethod, m_param->subpelRefine); + bool bChroma = slave.m_frame->m_fencPic->m_picCsp != X265_CSP_I400; + slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.pu.ctuAddr, pme.pu.cuAbsPartIdx, pme.pu.puAbsPartIdx, pme.pu.width, pme.pu.height, m_param->searchMethod, m_param->subpelRefine, bChroma); } /* Perform ME, repeat until no more work is available */ @@ -2069,7 +2076,7 @@ MotionData* bestME = interMode.bestME[puIdx]; PredictionUnit pu(cu, cuGeom, puIdx); - m_me.setSourcePU(*interMode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine); + m_me.setSourcePU(*interMode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine, bChromaMC); /* find best cost merge candidate. note: 2Nx2N merge and bidir are handled as separate modes */ uint32_t mrgCost = numPart == 1 ? MAX_UINT : mergeEstimation(cu, cuGeom, pu, puIdx, merge); @@ -2529,7 +2536,7 @@ interMode.lumaDistortion = primitives.cu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size); interMode.distortion = interMode.lumaDistortion; // Chroma - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { interMode.chromaDistortion = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize)); interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize)); @@ -2570,7 +2577,7 @@ uint32_t log2CUSize = cuGeom.log2CUSize; int sizeIdx = log2CUSize - 2; - resiYuv->subtract(*fencYuv, *predYuv, log2CUSize); + resiYuv->subtract(*fencYuv, *predYuv, log2CUSize, m_frame->m_fencPic->m_picCsp); uint32_t tuDepthRange[2]; cu.getInterTUQtDepthRange(tuDepthRange, 0); @@ -2584,7 +2591,7 @@ if (!tqBypass) { sse_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size); - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize)); cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize)); @@ -2655,14 +2662,14 @@ m_entropyCoder.store(interMode.contexts); if (cu.getQtRootCbf(0)) - reconYuv->addClip(*predYuv, *resiYuv, log2CUSize); + reconYuv->addClip(*predYuv, *resiYuv, log2CUSize, m_frame->m_fencPic->m_picCsp); else reconYuv->copyFromYuv(*predYuv); // update with clipped distortion and cost (qp estimation loop uses unclipped values) sse_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size); interMode.distortion = bestLumaDist; - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { sse_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize)); bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize)); @@ -2694,7 +2701,7 @@ { // code full block uint32_t log2TrSizeC = log2TrSize - m_hChromaShift; - uint32_t codeChroma = (m_csp != X265_CSP_I400) ? 1 : 0; + uint32_t codeChroma = (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) ? 1 : 0; uint32_t tuDepthC = tuDepth; if (log2TrSizeC < 2) @@ -2802,14 +2809,14 @@ { residualTransformQuantInter(mode, cuGeom, qPartIdx, tuDepth + 1, depthRange); ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1); - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1); vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1); } } cu.m_cbf[0][absPartIdx] |= ycbf << tuDepth; - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { cu.m_cbf[1][absPartIdx] |= ucbf << tuDepth; cu.m_cbf[2][absPartIdx] |= vcbf << tuDepth; @@ -2844,7 +2851,7 @@ X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n"); uint32_t log2TrSizeC = log2TrSize - m_hChromaShift; - uint32_t codeChroma = (m_csp != X265_CSP_I400) ? 1 : 0; + uint32_t codeChroma = (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) ? 1 : 0; uint32_t tuDepthC = tuDepth; if (log2TrSizeC < 2) { @@ -3095,6 +3102,19 @@ } } + if (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400) + { + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++) + { + TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, absPartIdxStep, absPartIdx); + do + { + uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU; + cu.setCbfPartRange(0, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep); + } + while(tuIterator.isNextSection()); + } + } if (checkTransformSkipY) { sse_t nonZeroDistY = 0; @@ -3304,14 +3324,14 @@ { estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, splitCost, depthRange); ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1); - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1); vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1); } } cu.m_cbf[0][absPartIdx] |= ycbf << tuDepth; - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { cu.m_cbf[1][absPartIdx] |= ucbf << tuDepth; cu.m_cbf[2][absPartIdx] |= vcbf << tuDepth; @@ -3403,7 +3423,7 @@ const bool bSubdiv = tuDepth < cu.m_tuDepth[absPartIdx]; uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth; - if (m_csp != X265_CSP_I400) + if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) { if (!(log2TrSize - m_hChromaShift < 2)) { @@ -3442,7 +3462,7 @@ const uint32_t qtLayer = log2TrSize - 2; uint32_t log2TrSizeC = log2TrSize - m_hChromaShift; - uint32_t codeChroma = (m_csp != X265_CSP_I400) ? 1 : 0; + uint32_t codeChroma = (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400) ? 1 : 0; uint32_t tuDepthC = tuDepth; if (log2TrSizeC < 2) { diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/encoder/slicetype.cpp Wed May 25 10:34:18 2016 +0530 @@ -83,7 +83,7 @@ uint32_t var; var = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp); - if (csp != X265_CSP_I400) + if (csp != X265_CSP_I400 && curFrame->m_fencPic->m_picCsp != X265_CSP_I400) { var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp); var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp); diff -r 6d3849d648f0 -r 028ccaa64860 source/encoder/weightPrediction.cpp --- a/source/encoder/weightPrediction.cpp Sun May 29 21:50:25 2016 +0800 +++ b/source/encoder/weightPrediction.cpp Wed May 25 10:34:18 2016 +0530 @@ -233,7 +233,7 @@ cache.numPredDir = slice.isInterP() ? 1 : 2; cache.lowresWidthInCU = fenc.width >> 3; cache.lowresHeightInCU = fenc.lines >> 3; - cache.csp = fencPic->m_picCsp; + cache.csp = param.internalCsp; cache.hshift = CHROMA_H_SHIFT(cache.csp); cache.vshift = CHROMA_V_SHIFT(cache.csp); @@ -330,7 +330,7 @@ { /* reference chroma planes must be extended prior to being * used as motion compensation sources */ - if (!refFrame->m_bChromaExtended && param.internalCsp != X265_CSP_I400) + if (!refFrame->m_bChromaExtended && param.internalCsp != X265_CSP_I400 && frame.m_fencPic->m_picCsp != X265_CSP_I400) { refFrame->m_bChromaExtended = true; PicYuv *refPic = refFrame->m_fencPic; _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel