Sent a new patch.
On Sat, Mar 8, 2014 at 12:50 AM, Steve Borho <[email protected]> wrote: > On Fri, Mar 7, 2014 at 5:45 AM, <[email protected]> wrote: > > # HG changeset patch > > # User Murugan Vairavel <[email protected]> > > # Date 1394192688 -19800 > > # Fri Mar 07 17:14:48 2014 +0530 > > # Node ID 5c626bf1e275596b45808c14952bd5aea8aaeb3e > > # Parent 2bf727dca27d6f69e96d4412850661cbe036cbef > > shortyuv: integrated asm primitives for blockcopy > > > > diff -r 2bf727dca27d -r 5c626bf1e275 source/common/shortyuv.cpp > > --- a/source/common/shortyuv.cpp Fri Mar 07 15:11:13 2014 +0530 > > +++ b/source/common/shortyuv.cpp Fri Mar 07 17:14:48 2014 +0530 > > @@ -129,32 +129,27 @@ > > > > void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t > partIdx, uint32_t width, uint32_t height) > > { > > + int part = partitionFromSizes(width, height); > > int16_t* src = getLumaAddr(partIdx); > > int16_t* dst = dstPicYuv->getLumaAddr(partIdx); > > > > uint32_t srcStride = m_width; > > uint32_t dstStride = dstPicYuv->m_width; > > -#if HIGH_BIT_DEPTH > > - primitives.blockcpy_pp(width, height, (pixel*)dst, dstStride, > (pixel*)src, srcStride); > > -#else > > - for (uint32_t y = height; y != 0; y--) > > - { > > - ::memcpy(dst, src, width * sizeof(int16_t)); > > - src += srcStride; > > - dst += dstStride; > > - } > > -#endif > > + > > + primitives.luma_copy_ss[part](dst, dstStride, src, srcStride); > > + > > } > > > > void ShortYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, > uint32_t width, uint32_t height) > > { > > + int part = partitionFromSizes(width, height); > > int16_t* src = getLumaAddr(partIdx); > > pixel* dst = dstPicYuv->getLumaAddr(partIdx); > > > > uint32_t srcStride = m_width; > > uint32_t dstStride = dstPicYuv->getStride(); > > > > - primitives.blockcpy_ps(width, height, dst, dstStride, src, > srcStride); > > + primitives.luma_copy_sp[part](dst, dstStride, src, srcStride); > > } > > > > void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t > partIdx, uint32_t width, uint32_t height) > > @@ -163,23 +158,15 @@ > > int16_t* srcV = getCrAddr(partIdx); > > int16_t* dstU = dstPicYuv->getCbAddr(partIdx); > > int16_t* dstV = dstPicYuv->getCrAddr(partIdx); > > + width = width << m_hChromaShift; > > + height = height << m_vChromaShift; > > + int part = partitionFromSizes(width, height); > > > > uint32_t srcStride = m_cwidth; > > uint32_t dstStride = dstPicYuv->m_cwidth; > > -#if HIGH_BIT_DEPTH > > - primitives.blockcpy_pp(width, height, (pixel*)dstU, dstStride, > (pixel*)srcU, srcStride); > > - primitives.blockcpy_pp(width, height, (pixel*)dstV, dstStride, > (pixel*)srcV, srcStride); > > -#else > > - for (uint32_t y = height; y != 0; y--) > > - { > > - ::memcpy(dstU, srcU, width * sizeof(int16_t)); > > - ::memcpy(dstV, srcV, width * sizeof(int16_t)); > > - srcU += srcStride; > > - srcV += srcStride; > > - dstU += dstStride; > > - dstV += dstStride; > > - } > > -#endif > > + > > + primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, > srcStride); > > + primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, > srcStride); > > > I'm pretty sure this is wrong. The chroma primitives are indexed by > color space and luma partition size, so the function pointer itself > scales the luma dimensions down to the correct chroma dimensions for > that color space. > > So essentially this function cannot use the chroma block copy > functions without changing the function arguments and callers to pass > in luma dimensions or the luma partition enum. > > > } > > > > void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t > partIdx, uint32_t width, uint32_t height) > > @@ -188,28 +175,29 @@ > > int16_t* srcV = getCrAddr(partIdx); > > pixel* dstU = dstPicYuv->getCbAddr(partIdx); > > pixel* dstV = dstPicYuv->getCrAddr(partIdx); > > + width = width << m_hChromaShift; > > + height = height << m_vChromaShift; > > + int part = partitionFromSizes(width, height); > > > > uint32_t srcStride = m_cwidth; > > uint32_t dstStride = dstPicYuv->getCStride(); > > > > - primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, > srcStride); > > - primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, > srcStride); > > + primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, > srcStride); > > + primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, > srcStride); > > ditto > > > } > > > > void ShortYuv::copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t > partIdx, uint32_t width, uint32_t height, uint32_t chromaId) > > { > > + width = width << m_hChromaShift; > > + height = height << m_vChromaShift; > > + int part = partitionFromSizes(width, height); > > if (chromaId == 0) > > { > > int16_t* srcU = getCbAddr(partIdx); > > int16_t* dstU = dstPicYuv->getCbAddr(partIdx); > > uint32_t srcStride = m_cwidth; > > uint32_t dstStride = dstPicYuv->m_cwidth; > > - for (uint32_t y = height; y != 0; y--) > > - { > > - ::memcpy(dstU, srcU, width * sizeof(int16_t)); > > - srcU += srcStride; > > - dstU += dstStride; > > - } > > + primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, > srcStride); > > } > > else if (chromaId == 1) > > { > > @@ -217,12 +205,7 @@ > > int16_t* dstV = dstPicYuv->getCrAddr(partIdx); > > uint32_t srcStride = m_cwidth; > > uint32_t dstStride = dstPicYuv->m_cwidth; > > - for (uint32_t y = height; y != 0; y--) > > - { > > - ::memcpy(dstV, srcV, width * sizeof(int16_t)); > > - srcV += srcStride; > > - dstV += dstStride; > > - } > > + primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, > srcStride); > > } > > else > > { > > @@ -232,27 +215,23 @@ > > int16_t* dstV = dstPicYuv->getCrAddr(partIdx); > > uint32_t srcStride = m_cwidth; > > uint32_t dstStride = dstPicYuv->m_cwidth; > > - for (uint32_t y = height; y != 0; y--) > > - { > > - ::memcpy(dstU, srcU, width * sizeof(int16_t)); > > - ::memcpy(dstV, srcV, width * sizeof(int16_t)); > > - srcU += srcStride; > > - srcV += srcStride; > > - dstU += dstStride; > > - dstV += dstStride; > > - } > > + primitives.chroma[m_csp].copy_ss[part](dstU, dstStride, srcU, > srcStride); > > + primitives.chroma[m_csp].copy_ss[part](dstV, dstStride, srcV, > srcStride); > > and... ditto > > > } > > } > > > > void ShortYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t > partIdx, uint32_t width, uint32_t height, uint32_t chromaId) > > { > > + width = width << m_hChromaShift; > > + height = height << m_vChromaShift; > > + int part = partitionFromSizes(width, height); > > if (chromaId == 0) > > { > > int16_t* srcU = getCbAddr(partIdx); > > pixel* dstU = dstPicYuv->getCbAddr(partIdx); > > uint32_t srcStride = m_cwidth; > > uint32_t dstStride = dstPicYuv->getCStride(); > > - primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, > srcStride); > > + primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, > srcStride); > > } > > else if (chromaId == 1) > > { > > @@ -260,7 +239,7 @@ > > pixel* dstV = dstPicYuv->getCrAddr(partIdx); > > uint32_t srcStride = m_cwidth; > > uint32_t dstStride = dstPicYuv->getCStride(); > > - primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, > srcStride); > > + primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, > srcStride); > > } > > else > > { > > @@ -271,7 +250,7 @@ > > > > uint32_t srcStride = m_cwidth; > > uint32_t dstStride = dstPicYuv->getCStride(); > > - primitives.blockcpy_ps(width, height, dstU, dstStride, srcU, > srcStride); > > - primitives.blockcpy_ps(width, height, dstV, dstStride, srcV, > srcStride); > > + primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, > srcStride); > > + primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, > srcStride); > > still busted. > > Have a look at how the TComYuv methods were modified to handle this. > > -- > Steve Borho > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- With Regards, Murugan. V +919659287478
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
