[Audio-dev] CR: Eliminate memory copying in mix engine for single player/stream case

Sheldon fu Wed, 29 Apr 2009 08:20:41 -0700

Synopsis:

This change gets rid of unnecessary memcpy and/or equivalent when mixing
is not really needed.



Overview:

Currently even when there is only a single player/stream, we still run
PCM data through the DSP mix engine. The mix engine logic will do two
memory copying operations even when in/out format matches (same sample
rate, channels and bytesPerSample) -- one in ConvertIntoBuffer when it
asks for the data from stream object and one to mix the data into the
output buffer session object provides. These two memcpys are pure
overhead in the typical single player/stream playback case.

The change adds a buffer list parameter to the stream object
MixIntoBuffer and in turn mixengine MixIntoBuffer method and a flag to
control it. When both session and mixengine agrees to do the 'bypass'
optimization, the stream object will return a list of buffers containing
exactly one block of audio data if put together. Stream object
constructs this list from its internal data buffers with
CHXBufferFragment to avoid create any new memory buffer. mix engine
MixIntoBuffer still runs through its logic but it will not touch any
actual data (no ConvertIntoBuffer call and no mixing into output
buffer). Stream object sends the buffer in the 'direct list' to the
audio device without change.

Session object agress to do 'bypass' when it knows there is only a
single player/stream and when the new HELIX_FEATURE_OPTIMIZED_MIXING is
defined. 

MixEngine agrees to do 'bypass' when it knows that there is nothing it
really needs to do -- same in/out format and when some of the DSP
features are not defined. The logic here can be improved in the future
so that we can handle these features dynamically -- e.g. when cross fade
is on, we can still do bypass until to the point that cross fade kicks
in. This is not coded yet since this work is mainly for optimization on
Android and Android build doesn't have these DSP features defined
currently.

Also this feature conflicts with HELIX_CONFIG_MIN_ROLLBACK_BUFFER that
the unix audio device uses. When HELIX_CONFIG_MIN_ROLLBACK_BUFFER is on,
unix audio device dynamically adjust its rollback buffer size to the
incoming buffer size every time _Write is called. That apparently only
can work when _Write is always called with a constant buffer size and if
not, it will break. Not sure why we have such logic there. Also the
rollback handling in unix audio device makes copies of incoming data
too.

Even with the change, the 'artificial' block-based handling of audio
data is still not efficient. Though now there is no memcpy anymore, the
control logic is quite complicated. OS Audio API can handle any size
write, up to the maximum buffer limit and for the simplest case, we
should just write the decoded PCM frame data to OS audio device in a
1-to-1 mapping way.

On Android, the change results in about 1-2% CPU usage saving when
playing a MP3 clip and there is no noticeable change on my host Linux
box (the machine is too fast already I think).

This CR also contains the change in the previous CR for mixengine
cvt16/cvt32 optimization.

Head and Atlas310.

fxd

? Makefile
? Umakefil.upp
? audiosvc.diff
? dbg
? diff.txt
? rel
? ribosome_logs
Index: hxaudses.cpp
===================================================================
RCS file: /cvsroot/client/audiosvc/hxaudses.cpp,v
retrieving revision 1.79.2.2
diff -u -w -r1.79.2.2 hxaudses.cpp
--- hxaudses.cpp	28 Jan 2009 06:12:54 -0000	1.79.2.2
+++ hxaudses.cpp	29 Apr 2009 13:30:58 -0000
@@ -265,6 +265,7 @@
     , m_bOpaqueMode(FALSE)
     , m_pOpaqueData(NULL)
     , m_pMPPSupport(NULL)
+    , m_pStreamBufferList(NULL)
     , m_ulTargetPushdown(TARGET_AUDIO_PUSHDOWN)
     , m_ulMinimumPushdown(MINIMUM_AUDIO_PUSHDOWN)
 #if defined(HELIX_FEATURE_TIMELINE_WATCHER)
@@ -431,6 +432,8 @@
     HX_RELEASE(m_pMutex);
     HX_RELEASE(m_pFinalHook);
     HX_RELEASE(m_pMPPSupport);
+    
+    HX_DELETE(m_pStreamBufferList);
 }
 /////////////////////////////////////////////////////////////////////////
 //  Method:
@@ -1231,6 +1234,7 @@
 
     HX_RESULT theErr        = HXR_OK;
     HXBOOL      bDisableWrite = FALSE;
+    UINT32    ulNumBytesWritten = 0;
 
     if ( !m_bInited )
         return theErr;
@@ -1256,6 +1260,7 @@
         if (HXR_OK == ProcessAudioDevice(ACTION_CHECK, m_pAudioDev))
         {
             /* Try to stuff in as much backlog as possible */
+            ulNumBytesWritten = 0;
             while (!theErr && m_pAuxiliaryAudioBuffers->GetCount() > 0)
             {
                 HXAudioData* pAudioData =
@@ -1272,8 +1277,16 @@
 
                 if (!theErr)
                 {
+                    //make sure we count by blocks
+                    //this also assume that audio device won't reject
+                    //data on non-block-boundary
+                    ulNumBytesWritten += pAudioData->pData->GetSize();
+                    if (ulNumBytesWritten == m_ulBytesPerGran)
+                    {
                     m_ulBlocksWritten++;
-                    m_dNumBytesWritten  += pAudioData->pData->GetSize();
+                      m_dNumBytesWritten  += ulNumBytesWritten;
+                      ulNumBytesWritten = 0;
+                    }
 
                     m_pAuxiliaryAudioBuffers->RemoveHead();
                     pAudioData->pData->Release();
@@ -1326,7 +1339,6 @@
             m_uNumToBePushed      = uPush - i;
             m_bSessionBufferDirty = FALSE; // only used for multi-player case
 
-            UINT32  ulNumBytesWritten       = m_ulBytesPerGran;
             HXBOOL  bAtLeastOnePlayerActive = FALSE;
 
             theErr = m_pSessionBuf->SetSize(m_ulBytesPerGran);
@@ -1341,6 +1353,28 @@
             // Zero session buffer.
             //memset(pSessionBuf, 0, HX_SAFESIZE_T(m_ulBytesPerGran));
 
+            //determine the special case of single player and single stream
+            HXBOOL bOptimizedMixing = FALSE;
+#ifdef HELIX_FEATURE_OPTIMIZED_MIXING            
+            if (m_pPlayerList->GetCount() == 1 
+                && ((CHXAudioPlayer*)m_pPlayerList->GetHead())->GetStreamCount() == 1)
+            {
+                bOptimizedMixing = TRUE;
+                //this will hold the non-memcpy-ed buffers returned from stream object
+                if (m_pStreamBufferList == NULL)
+                {
+                   m_pStreamBufferList = new CHXSimpleList;
+                }
+                
+                //best efford. instead of quit with OUTOFMEMORY, we just go back to
+                //normal mode if failed
+                if (m_pStreamBufferList == NULL)
+                {
+                   bOptimizedMixing = FALSE;
+                }
+            }
+#endif
+
             // Get each player
             pPlayer = 0;
             CHXSimpleList::Iterator lIter = m_pPlayerList->Begin();
@@ -1406,8 +1440,10 @@
                                 theErr = pStream->MixIntoBuffer( pMixBuffer,
                                                                  m_ulBytesPerGran,
                                                                  ulBufTime,
-                                                                 bIsMixBufferDirty);
-
+                                                                 bIsMixBufferDirty,
+                                                                 bOptimizedMixing,
+                                                                 m_pStreamBufferList 
+                                                                 );
                                 if (HXR_OK !=theErr)
                                 {
                                     HXLOGL4(HXLOG_ADEV, "CHXAudioSession[%p]::PlayAudio(): after mix: err = 0x%08x", this, theErr);
@@ -1572,6 +1608,29 @@
 
             if (m_pAudioDev && !m_bDisableWrite)
             {
+                // a bit ugly -- trying to use the same code here for optimized and non-optimized
+                // mixing cases
+                int count = 1; //non-optimized mixing case, only one buffer to write out 
+                if (bOptimizedMixing)
+                {
+                   count = m_pStreamBufferList->GetCount();
+                }
+                
+                ulNumBytesWritten = 0;
+                theErr = HXR_OK;
+                for (int i=0; i < count ; i++)
+                {
+                   if (bOptimizedMixing)
+                   {
+                       audioData.pData = (IHXBuffer*) m_pStreamBufferList->RemoveHead();
+                       
+                       //FXD: this floating math may be too heavy on certain platforms.
+                       //     currently only the win32 audio device cares about audioData.ulAudioTime value.
+                       //     we can disable this math operation for other platforms if we see a need.
+                       audioData.ulAudioTime = (ULONG32) m_dBufEndTime 
+                                              + m_dGranularity * (float)ulNumBytesWritten / m_ulBytesPerGran;
+                   }
+                    
                 /* are we dealing with a messed up sound card */
                 if ((m_BeforeHookDeviceFmt.uChannels == 1 && m_DeviceFmt.uChannels == 2)||
                     m_BeforeHookDeviceFmt.uBitsPerSample == 8)
@@ -1608,18 +1667,22 @@
                     ProcessHooks(&audioData);
                 }
 
-                ulNumBytesWritten = audioData.pData->GetSize();
+                   if (theErr == HXR_OK)
+                   {
+                      ulNumBytesWritten += audioData.pData->GetSize();
                 if (HXR_OK == ProcessAudioDevice(ACTION_CHECK, m_pAudioDev))
                 {
                     HXLOGL4(HXLOG_ADEV, "CHXAudioSession[%p]::PlayAudio(): writing [%p] %lu bytes", this, &audioData, ulNumBytesWritten);
 
                     // Write session audio data to device.
                     theErr = m_pAudioDev->Write(&audioData);
+
                     if( theErr == HXR_OUTOFMEMORY )
                     {
                         goto exit;
                     }
                 }
+                   }
 
                 if (theErr != HXR_OK)
                 {
@@ -1628,6 +1691,12 @@
 
                 if (theErr == HXR_WOULD_BLOCK)
                 {
+                       // we assume that the audio device will not reject partial-block write
+                       // i.e, it can only return HXR_WOULD_BLOCK on block boundary
+                       // for platform that the audio device doesn't follow this rule
+                       // HELIX_FEATURE_OPTIMIZED_MIXING shouldn't be defined
+                       HX_ASSERT(ulNumBytesWritten % m_ulBytesPerGran == 0);
+                       
                     HXAudioData* pAudioData     = new HXAudioData;
                     pAudioData->pData           = audioData.pData;
                     pAudioData->pData->AddRef();
@@ -1639,14 +1708,22 @@
                     {
                         m_pAuxiliaryAudioBuffers = new CHXSimpleList;
                     }
+                       
                     if( NULL == m_pAuxiliaryAudioBuffers->AddTail(pAudioData) )
                     {
                         theErr = HXR_OUTOFMEMORY;
                         goto exit;
                     }
 
-                    HX_RELEASE(m_pSessionBuf);
+                       HX_RELEASE(audioData.pData);
+                   }
+                   
+                   //make sure we've released the direct buffer from stream
+                   if (bOptimizedMixing)
+                   {
+                       HX_RELEASE(audioData.pData);
                 }
+                   
                 /* Any error from audio device other than memory error is
                  * returned as HXR_AUDIO_DRIVER
                  */
@@ -1655,13 +1732,17 @@
                 {
                     theErr = HXR_AUDIO_DRIVER;
                 }
-            }
+                } // count loop
+                
+                HX_ASSERT(m_pStreamBufferList == NULL || m_pStreamBufferList->GetCount() == 0);
 
             if (!theErr)
             {
                 m_ulBlocksWritten++;
+                    HX_ASSERT(ulNumBytesWritten == m_ulBytesPerGran);
                 m_dNumBytesWritten  += ulNumBytesWritten;
             }
+            }   
 
             // So this function is good in theory, but in practice we find in
             // heap-optimized mode it is not necessary, and it leads to
Index: hxaudstr_new.cpp
===================================================================
RCS file: /cvsroot/client/audiosvc/hxaudstr_new.cpp,v
retrieving revision 1.44.2.2
diff -u -w -r1.44.2.2 hxaudstr_new.cpp
--- hxaudstr_new.cpp	13 Dec 2007 18:23:52 -0000	1.44.2.2
+++ hxaudstr_new.cpp	29 Apr 2009 13:30:58 -0000
@@ -187,6 +187,7 @@
     ,m_bBeyondStartTime(FALSE)
     ,m_bHasStartTime(FALSE)
     ,m_piPendingAudioData(NULL)
+    ,m_pSilenceBuffer(NULL)
 {
     m_Owner = owner;
     if (m_Owner)
@@ -228,6 +229,7 @@
     HX_DELETE(m_DryNotificationMap);
     ResetStream();
     HX_RELEASE(m_piPendingAudioData);
+    HX_RELEASE(m_pSilenceBuffer);
 }
  
 /////////////////////////////////////////////////////////////////////////
@@ -1892,7 +1894,6 @@
     return m_bIsRewound;
 }
 
-
 /************************************************************************
  *  CHXAudioStream methods
  */
@@ -1907,6 +1908,8 @@
                                          ULONG32  ulBufSize,
                                          ULONG32& ulBufTime,
                                          HXBOOL&  bIsMixBufferDirty,
+                                         HXBOOL&  bOptimizedMixing,
+                                         CHXSimpleList* pStreamBufferList,
                                          HXBOOL   bGetCrossFadeData
                                          )
 {
@@ -1952,6 +1955,10 @@
     m_pMixEngine->GetMixRange(ulBufSize, llStartMix, llEndMix) ;
     UINT32 nSamplesNeeded = INT64_TO_UINT32(llEndMix - llStartMix) ; // always fits into UINT32
 
+    // Keep a copy since EnoughDataAvailable can change llStartMix and nSamplesNeeded
+    INT64 llStartMix0 = llStartMix;
+    UINT32 nSamplesNeeded0 = nSamplesNeeded;
+
     if (!EnoughDataAvailable(llStartMix, nSamplesNeeded))
     {
         HXLOGL3(HXLOG_ADEV,
@@ -2010,7 +2017,9 @@
     }
 
     // this call does all the mixing.
-    res = m_pMixEngine->MixIntoBuffer(pPlayerBuf, ulBufSize, bIsMixBufferDirty, m_bIsOpaqueStream) ;
+    res = m_pMixEngine->MixIntoBuffer(pPlayerBuf, ulBufSize, 
+                           bIsMixBufferDirty, bOptimizedMixing, m_bIsOpaqueStream);
+                           
     if( m_wLastError == HXR_OUTOFMEMORY )
     {
         return m_wLastError;
@@ -2021,6 +2030,13 @@
         return res ; 
     }
 
+    // if we are still in optimized mode, it means that mix engine doesn't fill the pPlayerBuf
+    // we'll create the pStreamBufferList directly
+    if (bOptimizedMixing)
+    {
+        CreateDirectOutput(pStreamBufferList, llStartMix0, nSamplesNeeded0);
+    }
+
 #if defined(HELIX_FEATURE_AUDIO_INACCURATESAMPLING)
     if( m_bRealAudioStream )
     {
@@ -2061,26 +2077,13 @@
     return HXR_OK;
 }
 
-
-// This is the callback function that m_pMixEngine->MixIntoBuffer() will call to
-// read new samples.
-HXBOOL CHXAudioStream::ConvertIntoBuffer(tAudioSample* buffer, UINT32 nSamples, INT64 llStartTimeInSamples)
+// helper method to remove old packets
+HX_RESULT CHXAudioStream::RemoveOldPackets(INT64 llStartTimeInSamples)
 {
-    HXAudioInfo* pInfo              = 0;
-    LISTPOSITION lp                 = 0;
-    INT32        nBytesPerSample    = m_AudioFmt.uBitsPerSample>>3 ;
-    HXBOOL       didMix             = FALSE ; // set to TRUE if we have packets in this time range
-    HXBOOL       bPacketsAfterRange = FALSE;
-
-    HXLOGL4(HXLOG_ADEV, "CHXAudioStream[%p]::ConvertIntoBuffer(): start samples = %lu; sample count = %lu", this, INT64_TO_UINT32(llStartTimeInSamples), nSamples);
- 
-    // there are two lists of packets here: timed audio and instantaneous audio.
-    // We only look into the list for timed buffers -- Instantaneoue audio is ignored
-    // (it never properly worked anyway, so support is discontinued).
-
     // remove old packets. Old packets are packets that have an end time that is
     // before our current mix time.
-    lp = m_pDataList->GetHeadPosition();
+    HXAudioInfo* pInfo = NULL;
+    LISTPOSITION lp = m_pDataList->GetHeadPosition();
     while( lp )
     {
         LISTPOSITION lastlp = lp;
@@ -2089,7 +2092,7 @@
         if (pInfo->llEndTimeInSamples < llStartTimeInSamples)
         {
             HXLOGL4(HXLOG_ADEV,
-                    "CHXAudioStream[%p]::ConvertIntoBuffer(): reaping packet: start = %lu; end = %lu",
+                    "CHXAudioStream[%p]::RemoveOldPackets: reaping packet: start = %lu; end = %lu",
                     this,
                     INT64_TO_UINT32(pInfo->llStartTimeInSamples),
                     INT64_TO_UINT32(pInfo->llEndTimeInSamples));
@@ -2107,6 +2110,167 @@
             break ;
         }
     }
+    return HXR_OK;
+}
+
+HX_RESULT CHXAudioStream::AddSilenceBuffer(CHXSimpleList* pStreamBufferList, UINT32 ulSizeInBytes)
+{
+    //on-demand create/enlarge the silence buffer. this may not be
+    //the best strategy
+    if (!m_pSilenceBuffer)
+    {
+      HX_ASSERT(m_pCommonClassFactory);
+      IUnknown* pUnk = NULL;
+      if (HXR_OK == m_pCommonClassFactory->CreateInstance(CLSID_IHXBuffer, (void**)&pUnk))
+      {
+          pUnk->QueryInterface(IID_IHXBuffer, (void**)&m_pSilenceBuffer);
+          HX_RELEASE(pUnk);
+      }
+      else
+      {
+          return HXR_OUTOFMEMORY;
+      }
+    }
+    
+    if (m_pSilenceBuffer->GetSize() < ulSizeInBytes)
+    {
+        m_pSilenceBuffer->SetSize(ulSizeInBytes);
+        
+        if (m_pSilenceBuffer->GetSize() != ulSizeInBytes)
+        {
+           //cannot resize because refcount > 1, create a new buffer
+           HX_RELEASE(m_pSilenceBuffer);
+           HX_ASSERT(m_pCommonClassFactory);
+           IUnknown* pUnk = NULL;
+           if (HXR_OK == m_pCommonClassFactory->CreateInstance(CLSID_IHXBuffer, (void**)&pUnk))
+           {
+               pUnk->QueryInterface(IID_IHXBuffer, (void**)&m_pSilenceBuffer);
+               HX_RELEASE(pUnk);
+           }
+           else
+           {
+               return HXR_OUTOFMEMORY;
+           }
+        }     
+        memset(m_pSilenceBuffer->GetBuffer(), 0, ulSizeInBytes);
+    }	            
+    
+    IHXBuffer* pSilenceBuffer = (IHXBuffer*) m_pSilenceBuffer;     
+    if (ulSizeInBytes < m_pSilenceBuffer->GetSize())
+    {
+        pSilenceBuffer = (IHXBuffer*) new CHXBufferFragment(m_pSilenceBuffer, 
+                                                            m_pSilenceBuffer->GetBuffer(),
+                                                            ulSizeInBytes);
+        if (pSilenceBuffer == NULL)
+        {
+           return HXR_OUTOFMEMORY;
+        }
+    }
+    pSilenceBuffer->AddRef();
+    pStreamBufferList->AddTail(pSilenceBuffer);
+
+    return HXR_OK;
+}             
+
+// This method creates a list of data buffers with the total requested size.
+// It avoids memcpy by using CHXBufferFragment
+HX_RESULT CHXAudioStream::CreateDirectOutput(CHXSimpleList* pStreamBufferList, INT64 llStartTimeInSamples, UINT32 nSamples)
+{
+    HXAudioInfo* pInfo              = 0;
+    LISTPOSITION lp                 = 0;
+    INT32        nBytesPerSample    = m_AudioFmt.uBitsPerSample>>3 ;
+    HXBOOL       didMix             = FALSE ; // set to TRUE if we have packets in this time range
+    HXBOOL       bPacketsAfterRange = FALSE;
+
+    HXLOGL4(HXLOG_ADEV, "CHXAudioStream[%p]::CreateDirectOutput(): start samples = %lu; sample count = %lu", this, INT64_TO_UINT32(llStartTimeInSamples), nSamples);
+ 
+    // there are two lists of packets here: timed audio and instantaneous audio.
+    // We only look into the list for timed buffers -- Instantaneoue audio is ignored
+    // (it never properly worked anyway, so support is discontinued).
+
+    // remove old packets. Old packets are packets that have an end time that is
+    // before our current mix time.
+    if (RemoveOldPackets(llStartTimeInSamples) != HXR_OK) return FALSE;
+    
+    // now go through the entire list of packets
+    // assume the packet timestamps are monotonously increase and non-overlapping
+    // we'll fill the gaps with silent packets
+    // the start and end points likely need 'partial' packets 
+
+    lp = m_pDataList->GetHeadPosition();
+    while( lp && nSamples > 0)
+    {
+        pInfo = (HXAudioInfo*) m_pDataList->GetNext(lp);
+        
+        if (pInfo->llStartTimeInSamples > llStartTimeInSamples)
+        {
+            //there is a gap, fill with a silence buffer
+            UINT32 gap = INT64_TO_UINT32(pInfo->llStartTimeInSamples - llStartTimeInSamples);
+            AddSilenceBuffer(pStreamBufferList, gap * nBytesPerSample);     
+            llStartTimeInSamples += gap;
+            nSamples -= gap;
+        }
+        
+        IHXBuffer* pBuffer = pInfo->pBuffer;
+
+        UINT32 ulStartSamples = 0;
+        if (pInfo->llStartTimeInSamples < llStartTimeInSamples)
+        {
+            // skip some data from the beginning
+            ulStartSamples = INT64_TO_UINT32(llStartTimeInSamples - pInfo->llStartTimeInSamples);
+        }
+
+        UINT32 ulSizeSamples = pInfo->pBuffer->GetSize()/nBytesPerSample - ulStartSamples;
+        if (ulSizeSamples > nSamples)
+        {
+           //enough data, this is the last packet
+           ulSizeSamples = nSamples;
+        }
+        
+        if (ulStartSamples > 0 || ulSizeSamples * nBytesPerSample != pInfo->pBuffer->GetSize())
+        {
+           //we need only part of the packet
+           pBuffer = (IHXBuffer*) new CHXBufferFragment(
+                            pInfo->pBuffer,
+                            pInfo->pBuffer->GetBuffer() + ulStartSamples * nBytesPerSample,
+                            ulSizeSamples * nBytesPerSample);
+        }
+    
+        pBuffer->AddRef();
+        pStreamBufferList->AddTail(pBuffer);
+        
+        llStartTimeInSamples += ulSizeSamples;
+        nSamples -= ulSizeSamples;
+    }
+
+    if (nSamples > 0)
+    {
+       //use used all the buffers, fill some silence data at the end
+       AddSilenceBuffer(pStreamBufferList, nSamples * nBytesPerSample);
+    }
+    
+    return HXR_OK;        
+}
+
+// This is the callback function that m_pMixEngine->MixIntoBuffer() will call to
+// read new samples.
+HXBOOL CHXAudioStream::ConvertIntoBuffer(tAudioSample* buffer, UINT32 nSamples, INT64 llStartTimeInSamples)
+{
+    HXAudioInfo* pInfo              = 0;
+    LISTPOSITION lp                 = 0;
+    INT32        nBytesPerSample    = m_AudioFmt.uBitsPerSample>>3 ;
+    HXBOOL       didMix             = FALSE ; // set to TRUE if we have packets in this time range
+    HXBOOL       bPacketsAfterRange = FALSE;
+
+    HXLOGL4(HXLOG_ADEV, "CHXAudioStream[%p]::ConvertIntoBuffer(): start samples = %lu; sample count = %lu", this, INT64_TO_UINT32(llStartTimeInSamples), nSamples);
+ 
+    // there are two lists of packets here: timed audio and instantaneous audio.
+    // We only look into the list for timed buffers -- Instantaneoue audio is ignored
+    // (it never properly worked anyway, so support is discontinued).
+
+    // remove old packets. Old packets are packets that have an end time that is
+    // before our current mix time.
+    if (RemoveOldPackets(llStartTimeInSamples) != HXR_OK) return FALSE;
 
     // now go through the entire list of packets, and look for overlap with the
     // convert buffer. Any packet with overlap will be at least partially converted
@@ -2191,8 +2355,6 @@
 
 
 
-
-
 /************************************************************************
  *  Method:
  *              CHXAudioStream::Bytes2Samples
@@ -2412,7 +2574,6 @@
     return bDataAvailable;
 }
 
-
 HX_RESULT    
 CHXAudioStream::StartCrossFade(CHXAudioStream*  pFromStream, 
                                UINT32           ulCrossFadeStartTime,
Index: mixengine.cpp
===================================================================
RCS file: /cvsroot/client/audiosvc/mixengine.cpp,v
retrieving revision 1.20.2.1
diff -u -w -r1.20.2.1 mixengine.cpp
--- mixengine.cpp	25 Jun 2008 23:21:29 -0000	1.20.2.1
+++ mixengine.cpp	29 Apr 2009 13:30:58 -0000
@@ -344,8 +344,23 @@
 }
 #endif
 
-HX_RESULT HXAudioSvcMixEngine::MixIntoBuffer(void* pPlayerbuf0, UINT32 ulBufSizeInBytes_4, HXBOOL &bIsMixBufferDirty, HXBOOL bOpaqueStream)
+HX_RESULT HXAudioSvcMixEngine::MixIntoBuffer(void* pPlayerbuf0, UINT32 ulBufSizeInBytes_4, 
+                        HXBOOL &bIsMixBufferDirty, HXBOOL& bOptimizedMixing, HXBOOL bOpaqueStream)
 {
+    if (bOptimizedMixing)
+    {
+       bOptimizedMixing = FALSE;
+       //we'll enable optimized mixing (caller can directly return the data in the range)
+       //when we see fit
+       if (m_pResampler == NULL && m_ulSampleRate_1_2 == m_ulSampleRate_3_4 && m_nChannels_1 == m_nChannels_4)
+       {
+          //XXFXD: TODO -- should dynamically detect conditions
+#if !defined(HELIX_FEATURE_CROSSFADE) && !defined(HELIX_FEATURE_GAINTOOL) && !defined(HELIX_FEATURE_LIMITER)
+           bOptimizedMixing = TRUE;
+#endif
+       }
+    }
+
     // our caller's sense of "dirty" is inverted
     bIsMixBufferDirty = !bIsMixBufferDirty ;
 
@@ -356,7 +371,7 @@
     // We only allocate the sample buffers when we really need them (the first
     // time MixIntoBuffer() is called)
 
-    if (!m_pBuffer_1)
+    if (!m_pBuffer_1 && !bOptimizedMixing)
     {
         // allocate both buffers
         m_pBuffer_1 = new tAudioSample[m_ulChunkSize_1] ;
@@ -406,7 +421,11 @@
         //
 
         // get input
-        HXBOOL bHaveInput = m_pCvt->ConvertIntoBuffer(m_pBuffer_1, nSamples_1, m_llTimestamp_1);
+        HXBOOL bHaveInput = TRUE;
+        if (!bOptimizedMixing)
+        {
+            bHaveInput = m_pCvt->ConvertIntoBuffer(m_pBuffer_1, nSamples_1, m_llTimestamp_1);
+        }
 
         // update the time stamp.
         m_llTimestamp_1 += nSamples_1 ;
@@ -557,10 +576,15 @@
             {
                 // if we did not have input earlier, but we now receive data, we need to clean out
                 // the parts that have not been touched so far.
+                if (!bOptimizedMixing)
+                {
                 memset(pPlayerbuf0,0,pPlayerbuf - (char*)pPlayerbuf0) ;
             }
+            }
 
             // and mix into output (mix) buffer
+            if (!bOptimizedMixing)
+            {
             switch (m_ulBytesPerSample)
             {
             case 2:
@@ -570,6 +594,7 @@
                 upmix(pResampOutput_3, (INT32*)pPlayerbuf, m_upmixMachine, ulSamples_3, bOpaqueStream ? TRUE : bIsMixBufferDirty) ;
                 break ;
             }
+            }
 
             // if we have input anywhere, the buffer is not "dirty" anymore.
             bHadInput = TRUE ;
@@ -580,9 +605,12 @@
             {
                 // if we did have input earlier, but do not now, we need to clean the output
                 // buffer (because it will not be marked "dirty" anymore).
+                if (!bOptimizedMixing)
+                {
                 memset(pPlayerbuf, 0, nSamples_4 * m_ulBytesPerSample) ;
             }
         }
+        }
 
         // save left-over samples
         m_nOutputSamplesLeft_3 -= ulSamples_3 ;
@@ -591,9 +619,14 @@
         // if there is no resampler, there should be no left-over samples
         if (!m_pResampler) HX_ASSERT(m_nOutputSamplesLeft_3 == 0) ;
 
+
         // if left-over samples
         if (m_nOutputSamplesLeft_3)
+        {
+            // there should be no left-over in optimized mixing
+            HX_ASSERT(!bOptimizedMixing);
             memcpy(m_pBuffer_3, m_pBuffer_3 + ulSamples_3, m_nOutputSamplesLeft_3 * sizeof(*m_pBuffer_3)) ;
+        }
 
         nSamplesOutput_4 -= nSamples_4 ;
         pPlayerbuf += nSamples_4 * m_ulBytesPerSample ;
Index: pub/hxaudses.h
===================================================================
RCS file: /cvsroot/client/audiosvc/pub/hxaudses.h,v
retrieving revision 1.31
diff -u -w -r1.31 hxaudses.h
--- pub/hxaudses.h	6 Jul 2007 21:57:40 -0000	1.31
+++ pub/hxaudses.h	29 Apr 2009 13:30:58 -0000
@@ -725,6 +725,10 @@
     ULONG32             m_ulMinimumPushdown;
     void                UpdateMinimumPushdown();
 
+    // Buffer list to hold the 'raw' pcm data buffers from stream
+    // in optimized mixing
+    CHXSimpleList*      m_pStreamBufferList;
+
 #if defined(HELIX_FEATURE_TIMELINE_WATCHER)
     //IHXTimelineManager support.
     CHXSimpleList*      m_pTimeLineWatchers;
Index: pub/hxaudstr.h
===================================================================
RCS file: /cvsroot/client/audiosvc/pub/hxaudstr.h,v
retrieving revision 1.21.2.1
diff -u -w -r1.21.2.1 hxaudstr.h
--- pub/hxaudstr.h	12 Oct 2007 17:10:20 -0000	1.21.2.1
+++ pub/hxaudstr.h	29 Apr 2009 13:30:59 -0000
@@ -344,6 +344,8 @@
 			    ULONG32	ulBufSize,
 			    ULONG32&    ulBufTime,
 			    HXBOOL&	bIsMixBufferDirty,
+                            HXBOOL&        bOptimizedMixing,
+                            CHXSimpleList* pStreamBufferList = NULL,
 			    HXBOOL	bGetCrossFadeData = FALSE
 			    );
     ULONG32		MixData(
@@ -590,6 +592,10 @@
     IHXVolume* m_pStreamVolume;
 #endif
 
+    HX_RESULT CreateDirectOutput(CHXSimpleList* pStreamBufferList, INT64 llStartTimeInSamples, UINT32 nSamples);
+    HX_RESULT RemoveOldPackets(INT64 llStartTimeInSamples);
+    IHXBuffer* m_pSilenceBuffer;    //hold silence data
+    HX_RESULT AddSilenceBuffer(CHXSimpleList* pStreamBufferList, UINT32 ulSizeInBytes);
 };
 
 #ifdef _MACINTOSH
Index: pub/mixengine.h
===================================================================
RCS file: /cvsroot/client/audiosvc/pub/mixengine.h,v
retrieving revision 1.17
diff -u -w -r1.17 mixengine.h
--- pub/mixengine.h	6 Jul 2007 21:57:40 -0000	1.17
+++ pub/mixengine.h	29 Apr 2009 13:30:59 -0000
@@ -59,6 +59,7 @@
 // on memory- and resource-constrained devices, use 16-bit processing
 #ifdef HELIX_FEATURE_16BIT_MIXENGINE
 typedef INT16 tAudioSample ;
+#define NBITS_PER_AUDIOSAMPLE 16
 
 #if defined(HELIX_FEATURE_GAINTOOL) || defined(HELIX_FEATURE_CROSSFADE)\
  || defined(HELIX_FEATURE_LIMITER)
@@ -67,9 +68,9 @@
 
 #else // all other platforms use 32-bit processing
 typedef INT32 tAudioSample ;
+#define NBITS_PER_AUDIOSAMPLE 32
 #endif
 
-#define NBITS_PER_AUDIOSAMPLE (sizeof(tAudioSample)<<3)
 
 // derive your class from this. This will be used as a callback to convert samples from
 // the renderer input queues into the HXAudioSvcMixEngine source buffer
@@ -93,14 +94,19 @@
     }
     static void cvt16(const void *in, tAudioSample* out, int nSamples)
     {
+#if NBITS_PER_AUDIOSAMPLE == 16
+        memcpy(out, in, nSamples * sizeof(tAudioSample));
+#else
         for (int i=0; i < nSamples; i++) out[i] = ((const INT16*)in)[i] << (NBITS_PER_AUDIOSAMPLE-16) ;
+#endif
     }
     static void cvt32(const void *in, tAudioSample* out, int nSamples)
     {
-        if (NBITS_PER_AUDIOSAMPLE == 32)
-            memcpy(out, in, nSamples * sizeof(*out));
-        else
+#if NBITS_PER_AUDIOSAMPLE == 32
+        memcpy(out, in, nSamples * sizeof(tAudioSample));
+#else
             for (int i=0; i < nSamples; i++) out[i] = (INT16)(((const INT32*)in)[i] >> (NBITS_PER_AUDIOSAMPLE-16)) ;
+#endif
     }
     static void silence(tAudioSample* out, int nSamples)
     {
@@ -159,6 +165,7 @@
         void* pPlayerBuf,
         UINT32 ulBufSizeInBytes,
         HXBOOL&    bIsMixBufferDirty,
+        HXBOOL&        bOptimizedMixing,
 	HXBOOL bOpaqueStream = FALSE
     ) ;

_______________________________________________
Audio-dev mailing list
Audio-dev@helixcommunity.org
http://lists.helixcommunity.org/mailman/listinfo/audio-dev

[Audio-dev] CR: Eliminate memory copying in mix engine for single player/stream case

Reply via email to