From: Mark Reid <mindm...@gmail.com> Hi, This patch fixes audio issues I've had with some capture devices. The audio gets really choppy and stops working. This seems to be because avf_read_packet stops outputting the audio frames because a video frame happens to be available first.
It base on the approach used in a patch from #4437 https://trac.ffmpeg.org/ticket/4437 My approach uses an AVFifoBuffer instead of NSMutableArray and also outputs the packets in the same order they arrive from AVFFoundation. should fix ticket #4437 and #4513 --- libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++--------- 1 file changed, 124 insertions(+), 36 deletions(-) diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m index 59d5b0af4f..5ac6ec4183 100644 --- a/libavdevice/avfoundation.m +++ b/libavdevice/avfoundation.m @@ -31,13 +31,17 @@ #include "libavutil/pixdesc.h" #include "libavutil/opt.h" #include "libavutil/avstring.h" +#include "libavutil/avassert.h" #include "libavformat/internal.h" #include "libavutil/internal.h" #include "libavutil/parseutils.h" #include "libavutil/time.h" #include "libavutil/imgutils.h" +#include "libavutil/fifo.h" #include "avdevice.h" +#define FIFO_SIZE 4 + static const int avf_time_base = 1000000; static const AVRational avf_time_base_q = { @@ -128,8 +132,8 @@ typedef struct AVCaptureSession *capture_session; AVCaptureVideoDataOutput *video_output; AVCaptureAudioDataOutput *audio_output; - CMSampleBufferRef current_frame; - CMSampleBufferRef current_audio_frame; + AVFifoBuffer *video_fifo; + AVFifoBuffer *audio_fifo; AVCaptureDevice *observed_device; #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 @@ -138,6 +142,11 @@ typedef struct int observed_quit; } AVFContext; +typedef struct { + int64_t ts; + CMSampleBufferRef frame; +} BufferRef; + static void lock_frames(AVFContext* ctx) { pthread_mutex_lock(&ctx->frame_lock); @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx) pthread_mutex_unlock(&ctx->frame_lock); } +static inline void fifo_write(AVFifoBuffer* f, int64_t ts, CMSampleBufferRef frame) +{ + BufferRef buf = { + .ts = ts, + .frame = frame, + }; + + CFRetain(frame); + av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL); +} + +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf) +{ + if (av_fifo_size(f)) { + av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL); + return; + } + buf->frame = nil; + return; +} + +static inline void fifo_drain(AVFifoBuffer* f, int release) +{ + av_assert2(av_fifo_size(f) >= sizeof(BufferRef)); + if (release) { + BufferRef buf; + fifo_peek(f, &buf); + CFRelease(buf.frame); + } + av_fifo_drain(f, sizeof(BufferRef)); +} + +static inline void fifo_freep(AVFifoBuffer **f) +{ + if (f) { + while (av_fifo_size(*f)) { + fifo_drain(*f, 1); + } + av_fifo_freep(f); + } +} + /** FrameReciever class - delegate for AVCaptureSession */ @interface AVFFrameReceiver : NSObject @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)videoFrame fromConnection:(AVCaptureConnection *)connection { + AVFifoBuffer *fifo = _context->video_fifo; + int64_t ts = av_gettime_relative(); lock_frames(_context); - if (_context->current_frame != nil) { - CFRelease(_context->current_frame); + if (av_fifo_space(fifo) == 0) { + av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest frame has been dropped\n"); + fifo_drain(fifo, 1); } - _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame); + fifo_write(fifo, ts, videoFrame); unlock_frames(_context); @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx) didOutputSampleBuffer:(CMSampleBufferRef)audioFrame fromConnection:(AVCaptureConnection *)connection { + AVFifoBuffer *fifo = _context->audio_fifo; + int64_t ts = av_gettime_relative(); lock_frames(_context); - if (_context->current_audio_frame != nil) { - CFRelease(_context->current_audio_frame); + if (!av_fifo_space(fifo)) { + av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest frame has been dropped\n"); + fifo_drain(fifo, 1); } - _context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame); + fifo_write(fifo, ts, audioFrame); unlock_frames(_context); @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx) ctx->avf_audio_delegate = NULL; av_freep(&ctx->audio_buffer); + fifo_freep(&ctx->video_fifo); + fifo_freep(&ctx->audio_fifo); pthread_mutex_destroy(&ctx->frame_lock); - - if (ctx->current_frame) { - CFRelease(ctx->current_frame); - } } static void parse_device_name(AVFormatContext *s) @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device) static int get_video_config(AVFormatContext *s) { AVFContext *ctx = (AVFContext*)s->priv_data; + BufferRef buf; CVImageBufferRef image_buffer; CMBlockBufferRef block_buffer; CGSize image_buffer_size; @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s) avpriv_set_pts_info(stream, 64, 1, avf_time_base); - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); + fifo_peek(ctx->video_fifo, &buf); + if (buf.frame == nil) { + return 1; + } + + image_buffer = CMSampleBufferGetImageBuffer(buf.frame); + block_buffer = CMSampleBufferGetDataBuffer(buf.frame); if (image_buffer) { image_buffer_size = CVImageBufferGetEncodedSize(image_buffer); @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s) stream->codecpar->format = ctx->pixel_format; } - CFRelease(ctx->current_frame); - ctx->current_frame = nil; - unlock_frames(ctx); return 0; @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s) static int get_audio_config(AVFormatContext *s) { AVFContext *ctx = (AVFContext*)s->priv_data; + BufferRef buf; CMFormatDescriptionRef format_desc; AVStream* stream = avformat_new_stream(s, NULL); @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s) avpriv_set_pts_info(stream, 64, 1, avf_time_base); - format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame); + fifo_peek(ctx->audio_fifo, &buf); + if (buf.frame == nil) { + return 1; + } + + format_desc = CMSampleBufferGetFormatDescription(buf.frame); const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); if (!basic_desc) { @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s) } if (ctx->audio_non_interleaved) { - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); + CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(buf.frame); ctx->audio_buffer_size = CMBlockBufferGetDataLength(block_buffer); ctx->audio_buffer = av_malloc(ctx->audio_buffer_size); if (!ctx->audio_buffer) { @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s) } } - CFRelease(ctx->current_audio_frame); - ctx->current_audio_frame = nil; - unlock_frames(ctx); return 0; @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s) pthread_mutex_init(&ctx->frame_lock, NULL); + ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); + ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); + #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 CGGetActiveDisplayList(0, NULL, &num_screens); #endif @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) AVFContext* ctx = (AVFContext*)s->priv_data; do { + BufferRef video; + BufferRef audio; CVImageBufferRef image_buffer; CMBlockBufferRef block_buffer; lock_frames(ctx); - if (ctx->current_frame != nil) { + fifo_peek(ctx->video_fifo, &video); + fifo_peek(ctx->audio_fifo, &audio); + + if (video.frame != nil && audio.frame != nil) { + // process oldest CMSampleBufferRef first + if (audio.ts <= video.ts) { + video.frame = nil; + } else { + audio.frame = nil; + } + } + + if (video.frame != nil) { int status; int length = 0; - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); + fifo_drain(ctx->video_fifo, 0); + unlock_frames(ctx); + + image_buffer = CMSampleBufferGetImageBuffer(video.frame); + block_buffer = CMSampleBufferGetDataBuffer(video.frame); if (image_buffer != nil) { length = (int)CVPixelBufferGetDataSize(image_buffer); } else if (block_buffer != nil) { length = (int)CMBlockBufferGetDataLength(block_buffer); } else { + CFRelease(video.frame); return AVERROR(EINVAL); } if (av_new_packet(pkt, length) < 0) { + CFRelease(video.frame); return AVERROR(EIO); } CMItemCount count; CMSampleTimingInfo timing_info; - if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, &timing_info, &count) == noErr) { + if (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info, &count) == noErr) { AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale); pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q); } @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) status = AVERROR(EIO); } } - CFRelease(ctx->current_frame); - ctx->current_frame = nil; + CFRelease(video.frame); - if (status < 0) + if (status < 0) { return status; - } else if (ctx->current_audio_frame != nil) { - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame); + } + } else if (audio.frame != nil) { + CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(audio.frame); int block_buffer_size = CMBlockBufferGetDataLength(block_buffer); + fifo_drain(ctx->audio_fifo, 0); + unlock_frames(ctx); + if (!block_buffer || !block_buffer_size) { + CFRelease(audio.frame); return AVERROR(EIO); } if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) { + CFRelease(audio.frame); return AVERROR_BUFFER_TOO_SMALL; } if (av_new_packet(pkt, block_buffer_size) < 0) { + CFRelease(audio.frame); return AVERROR(EIO); } CMItemCount count; CMSampleTimingInfo timing_info; - if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) { + if (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info, &count) == noErr) { AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale); pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q); } @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer); if (ret != kCMBlockBufferNoErr) { + CFRelease(audio.frame); return AVERROR(EIO); } @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) } else { OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data); if (ret != kCMBlockBufferNoErr) { + CFRelease(audio.frame); return AVERROR(EIO); } } - CFRelease(ctx->current_audio_frame); - ctx->current_audio_frame = nil; + CFRelease(audio.frame); } else { pkt->data = NULL; unlock_frames(ctx); @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt) return AVERROR(EAGAIN); } } - - unlock_frames(ctx); } while (!pkt->data); return 0; -- 2.29.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".