[FFmpeg-devel] [PATCH 1/3] lavd/avfoundation: Switch from run-time approach for format detection to format query and Organize frame buffer in buffer queue.

Thilo Borgmann thilo.borgmann at mail.de
Tue Nov 24 19:31:20 CET 2015


-------------- next part --------------
From 852b7db7efd2de9409a578f2e19debd2cfaf2abd Mon Sep 17 00:00:00 2001
From: Thilo Borgmann <thilo.borgmann at mail.de>
Date: Mon, 23 Nov 2015 23:19:23 +0100
Subject: [PATCH 1/3] lavd/avfoundation: Switch from run-time approach for
 format detection to format query and Organize frame buffer in buffer queue.

This fixes tickets #4089, #4437, #4463 and #4513.
---
 libavdevice/avfoundation.m | 385 +++++++++++++++++++--------------------------
 1 file changed, 166 insertions(+), 219 deletions(-)

diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
index 763e675..37f6be0 100644
--- a/libavdevice/avfoundation.m
+++ b/libavdevice/avfoundation.m
@@ -26,6 +26,7 @@
  */
 
 #import <AVFoundation/AVFoundation.h>
+#import <CoreMedia/CoreMedia.h>
 #include <pthread.h>
 
 #include "libavutil/pixdesc.h"
@@ -82,14 +83,10 @@ typedef struct
 {
     AVClass*        class;
 
-    int             frames_captured;
-    int             audio_frames_captured;
     int64_t         first_pts;
     int64_t         first_audio_pts;
-    pthread_mutex_t frame_lock;
-    pthread_cond_t  frame_wait_cond;
     id              avf_delegate;
-    id              avf_audio_delegate;
+    dispatch_queue_t dispatch_queue;
 
     AVRational      framerate;
     int             width, height;
@@ -108,6 +105,8 @@ typedef struct
 
     int             num_video_devices;
 
+    AVCaptureDeviceFormat *audio_format;
+
     int             audio_channels;
     int             audio_bits_per_sample;
     int             audio_float;
@@ -124,20 +123,9 @@ typedef struct
     AVCaptureSession         *capture_session;
     AVCaptureVideoDataOutput *video_output;
     AVCaptureAudioDataOutput *audio_output;
-    CMSampleBufferRef         current_frame;
-    CMSampleBufferRef         current_audio_frame;
+    CMBufferQueueRef         frame_buffer;
 } AVFContext;
 
-static void lock_frames(AVFContext* ctx)
-{
-    pthread_mutex_lock(&ctx->frame_lock);
-}
-
-static void unlock_frames(AVFContext* ctx)
-{
-    pthread_mutex_unlock(&ctx->frame_lock);
-}
-
 /** FrameReciever class - delegate for AVCaptureSession
  */
 @interface AVFFrameReceiver : NSObject
@@ -167,65 +155,7 @@ static void unlock_frames(AVFContext* ctx)
   didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
          fromConnection:(AVCaptureConnection *)connection
 {
-    lock_frames(_context);
-
-    if (_context->current_frame != nil) {
-        CFRelease(_context->current_frame);
-    }
-
-    _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame);
-
-    pthread_cond_signal(&_context->frame_wait_cond);
-
-    unlock_frames(_context);
-
-    ++_context->frames_captured;
-}
-
- at end
-
-/** AudioReciever class - delegate for AVCaptureSession
- */
- at interface AVFAudioReceiver : NSObject
-{
-    AVFContext* _context;
-}
-
-- (id)initWithContext:(AVFContext*)context;
-
-- (void)  captureOutput:(AVCaptureOutput *)captureOutput
-  didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
-         fromConnection:(AVCaptureConnection *)connection;
-
- at end
-
- at implementation AVFAudioReceiver
-
-- (id)initWithContext:(AVFContext*)context
-{
-    if (self = [super init]) {
-        _context = context;
-    }
-    return self;
-}
-
-- (void)  captureOutput:(AVCaptureOutput *)captureOutput
-  didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
-         fromConnection:(AVCaptureConnection *)connection
-{
-    lock_frames(_context);
-
-    if (_context->current_audio_frame != nil) {
-        CFRelease(_context->current_audio_frame);
-    }
-
-    _context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame);
-
-    pthread_cond_signal(&_context->frame_wait_cond);
-
-    unlock_frames(_context);
-
-    ++_context->audio_frames_captured;
+    CMBufferQueueEnqueue(_context->frame_buffer, videoFrame);
 }
 
 @end
@@ -238,22 +168,16 @@ static void destroy_context(AVFContext* ctx)
     [ctx->video_output    release];
     [ctx->audio_output    release];
     [ctx->avf_delegate    release];
-    [ctx->avf_audio_delegate release];
 
     ctx->capture_session = NULL;
     ctx->video_output    = NULL;
     ctx->audio_output    = NULL;
     ctx->avf_delegate    = NULL;
-    ctx->avf_audio_delegate = NULL;
 
     av_freep(&ctx->audio_buffer);
 
-    pthread_mutex_destroy(&ctx->frame_lock);
-    pthread_cond_destroy(&ctx->frame_wait_cond);
-
-    if (ctx->current_frame) {
-        CFRelease(ctx->current_frame);
-    }
+    CFRelease(ctx->frame_buffer);
+    dispatch_release(ctx->dispatch_queue);
 }
 
 static void parse_device_name(AVFormatContext *s)
@@ -273,7 +197,7 @@ static void parse_device_name(AVFormatContext *s)
 /**
  * Configure the video device.
  *
- * Configure the video device using a run-time approach to access properties
+ * Configure the video device using format query to access properties
  * since formats, activeFormat are available since  iOS >= 7.0 or OSX >= 10.7
  * and activeVideoMaxFrameDuration is available since i0S >= 7.0 and OSX >= 10.9.
  *
@@ -301,6 +225,8 @@ static int configure_video_device(AVFormatContext *s, AVCaptureDevice *video_dev
             (dimensions.width == ctx->width && dimensions.height == ctx->height)) {
 
             selected_format = format;
+            ctx->width      = dimensions.width;
+            ctx->height     = dimensions.height;
 
             for (range in [format valueForKey:@"videoSupportedFrameRateRanges"]) {
                 double max_framerate;
@@ -372,7 +298,6 @@ static int add_video_device(AVFormatContext *s, AVCaptureDevice *video_device)
     struct AVFPixelFormatSpec pxl_fmt_spec;
     NSNumber *pixel_format;
     NSDictionary *capture_dict;
-    dispatch_queue_t queue;
 
     if (ctx->video_device_index < ctx->num_video_devices) {
         capture_input = (AVCaptureInput*) [[[AVCaptureDeviceInput alloc] initWithDevice:video_device error:&error] autorelease];
@@ -474,12 +399,7 @@ static int add_video_device(AVFormatContext *s, AVCaptureDevice *video_device)
 
     [ctx->video_output setVideoSettings:capture_dict];
     [ctx->video_output setAlwaysDiscardsLateVideoFrames:YES];
-
-    ctx->avf_delegate = [[AVFFrameReceiver alloc] initWithContext:ctx];
-
-    queue = dispatch_queue_create("avf_queue", NULL);
-    [ctx->video_output setSampleBufferDelegate:ctx->avf_delegate queue:queue];
-    dispatch_release(queue);
+    [ctx->video_output setSampleBufferDelegate:ctx->avf_delegate queue:ctx->dispatch_queue];
 
     if ([ctx->capture_session canAddOutput:ctx->video_output]) {
         [ctx->capture_session addOutput:ctx->video_output];
@@ -496,7 +416,6 @@ static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device)
     AVFContext *ctx = (AVFContext*)s->priv_data;
     NSError *error  = nil;
     AVCaptureDeviceInput* audio_dev_input = [[[AVCaptureDeviceInput alloc] initWithDevice:audio_device error:&error] autorelease];
-    dispatch_queue_t queue;
 
     if (!audio_dev_input) {
         av_log(s, AV_LOG_ERROR, "Failed to create AV capture input device: %s\n",
@@ -519,11 +438,8 @@ static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device)
         return 1;
     }
 
-    ctx->avf_audio_delegate = [[AVFAudioReceiver alloc] initWithContext:ctx];
+    [ctx->audio_output setSampleBufferDelegate:ctx->avf_delegate queue:ctx->dispatch_queue];
 
-    queue = dispatch_queue_create("avf_audio_queue", NULL);
-    [ctx->audio_output setSampleBufferDelegate:ctx->avf_audio_delegate queue:queue];
-    dispatch_release(queue);
 
     if ([ctx->capture_session canAddOutput:ctx->audio_output]) {
         [ctx->capture_session addOutput:ctx->audio_output];
@@ -537,125 +453,112 @@ static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device)
 
 static int get_video_config(AVFormatContext *s)
 {
-    AVFContext *ctx = (AVFContext*)s->priv_data;
-    CVImageBufferRef image_buffer;
-    CGSize image_buffer_size;
+    AVFContext *ctx  = (AVFContext*)s->priv_data;
     AVStream* stream = avformat_new_stream(s, NULL);
 
     if (!stream) {
         return 1;
     }
 
-    // Take stream info from the first frame.
-    while (ctx->frames_captured < 1) {
-        CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.1, YES);
-    }
-
-    lock_frames(ctx);
-
-    ctx->video_stream_index = stream->index;
-
-    avpriv_set_pts_info(stream, 64, 1, avf_time_base);
-
-    image_buffer      = CMSampleBufferGetImageBuffer(ctx->current_frame);
-    image_buffer_size = CVImageBufferGetEncodedSize(image_buffer);
-
+    ctx->video_stream_index   = stream->index;
     stream->codec->codec_id   = AV_CODEC_ID_RAWVIDEO;
     stream->codec->codec_type = AVMEDIA_TYPE_VIDEO;
-    stream->codec->width      = (int)image_buffer_size.width;
-    stream->codec->height     = (int)image_buffer_size.height;
+    stream->codec->width      = ctx->width;
+    stream->codec->height     = ctx->height;
     stream->codec->pix_fmt    = ctx->pixel_format;
 
-    CFRelease(ctx->current_frame);
-    ctx->current_frame = nil;
-
-    unlock_frames(ctx);
+    avpriv_set_pts_info(stream, 64, 1, avf_time_base);
 
     return 0;
 }
 
+static enum AVCodecID get_audio_codec_id(AVCaptureDeviceFormat *audio_format)
+{
+    AudioStreamBasicDescription *audio_format_desc = (AudioStreamBasicDescription*)CMAudioFormatDescriptionGetStreamBasicDescription(audio_format.formatDescription);
+    int audio_linear          = audio_format_desc->mFormatID == kAudioFormatLinearPCM;
+    int audio_bits_per_sample = audio_format_desc->mBitsPerChannel;
+    int audio_float           = audio_format_desc->mFormatFlags & kAudioFormatFlagIsFloat;
+    int audio_be              = audio_format_desc->mFormatFlags & kAudioFormatFlagIsBigEndian;
+    int audio_signed_integer  = audio_format_desc->mFormatFlags & kAudioFormatFlagIsSignedInteger;
+    int audio_packed          = audio_format_desc->mFormatFlags & kAudioFormatFlagIsPacked;
+
+    enum AVCodecID ret = AV_CODEC_ID_NONE;
+
+    if (audio_linear &&
+        audio_float &&
+        audio_bits_per_sample == 32 &&
+        audio_packed) {
+        ret = audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
+    } else if (audio_linear &&
+        audio_signed_integer &&
+        audio_bits_per_sample == 16 &&
+        audio_packed) {
+        ret = audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
+    } else if (audio_linear &&
+        audio_signed_integer &&
+        audio_bits_per_sample == 24 &&
+        audio_packed) {
+        ret = audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
+    } else if (audio_linear &&
+        audio_signed_integer &&
+        audio_bits_per_sample == 32 &&
+        audio_packed) {
+        ret = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
+    }
+
+    return ret;
+}
+
 static int get_audio_config(AVFormatContext *s)
 {
-    AVFContext *ctx = (AVFContext*)s->priv_data;
-    CMFormatDescriptionRef format_desc;
+    AVFContext *ctx  = (AVFContext*)s->priv_data;
     AVStream* stream = avformat_new_stream(s, NULL);
+    AudioStreamBasicDescription *audio_format_desc = (AudioStreamBasicDescription*)CMAudioFormatDescriptionGetStreamBasicDescription(ctx->audio_format.formatDescription);
 
     if (!stream) {
         return 1;
     }
 
-    // Take stream info from the first frame.
-    while (ctx->audio_frames_captured < 1) {
-        CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.1, YES);
-    }
-
-    lock_frames(ctx);
-
-    ctx->audio_stream_index = stream->index;
-
-    avpriv_set_pts_info(stream, 64, 1, avf_time_base);
-
-    format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
-    const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
-
-    if (!basic_desc) {
-        av_log(s, AV_LOG_ERROR, "audio format not available\n");
-        return 1;
-    }
-
+    ctx->audio_stream_index       = stream->index;
     stream->codec->codec_type     = AVMEDIA_TYPE_AUDIO;
-    stream->codec->sample_rate    = basic_desc->mSampleRate;
-    stream->codec->channels       = basic_desc->mChannelsPerFrame;
+    stream->codec->sample_rate    = audio_format_desc->mSampleRate;
+    stream->codec->channels       = audio_format_desc->mChannelsPerFrame;
     stream->codec->channel_layout = av_get_default_channel_layout(stream->codec->channels);
 
-    ctx->audio_channels        = basic_desc->mChannelsPerFrame;
-    ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel;
-    ctx->audio_float           = basic_desc->mFormatFlags & kAudioFormatFlagIsFloat;
-    ctx->audio_be              = basic_desc->mFormatFlags & kAudioFormatFlagIsBigEndian;
-    ctx->audio_signed_integer  = basic_desc->mFormatFlags & kAudioFormatFlagIsSignedInteger;
-    ctx->audio_packed          = basic_desc->mFormatFlags & kAudioFormatFlagIsPacked;
-    ctx->audio_non_interleaved = basic_desc->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
+    avpriv_set_pts_info(stream, 64, 1, avf_time_base);
+
+    ctx->audio_channels        = audio_format_desc->mChannelsPerFrame;
+    ctx->audio_bits_per_sample = audio_format_desc->mBitsPerChannel;
+    int audio_float            = audio_format_desc->mFormatFlags & kAudioFormatFlagIsFloat;
+    int audio_be               = audio_format_desc->mFormatFlags & kAudioFormatFlagIsBigEndian;
+    int audio_signed_integer   = audio_format_desc->mFormatFlags & kAudioFormatFlagIsSignedInteger;
+    int audio_packed           = audio_format_desc->mFormatFlags & kAudioFormatFlagIsPacked;
 
-    if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
-        ctx->audio_float &&
+    if (audio_format_desc->mFormatID == kAudioFormatLinearPCM &&
+        audio_float &&
         ctx->audio_bits_per_sample == 32 &&
-        ctx->audio_packed) {
-        stream->codec->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
-    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
-        ctx->audio_signed_integer &&
+        audio_packed) {
+        stream->codec->codec_id = audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
+    } else if (audio_format_desc->mFormatID == kAudioFormatLinearPCM &&
+        audio_signed_integer &&
         ctx->audio_bits_per_sample == 16 &&
-        ctx->audio_packed) {
-        stream->codec->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
-    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
-        ctx->audio_signed_integer &&
+        audio_packed) {
+        stream->codec->codec_id = audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
+    } else if (audio_format_desc->mFormatID == kAudioFormatLinearPCM &&
+        audio_signed_integer &&
         ctx->audio_bits_per_sample == 24 &&
-        ctx->audio_packed) {
-        stream->codec->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
-    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
-        ctx->audio_signed_integer &&
+        audio_packed) {
+        stream->codec->codec_id = audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
+    } else if (audio_format_desc->mFormatID == kAudioFormatLinearPCM &&
+        audio_signed_integer &&
         ctx->audio_bits_per_sample == 32 &&
-        ctx->audio_packed) {
-        stream->codec->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
+        audio_packed) {
+        stream->codec->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
     } else {
         av_log(s, AV_LOG_ERROR, "audio format is not supported\n");
         return 1;
     }
 
-    if (ctx->audio_non_interleaved) {
-        CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
-        ctx->audio_buffer_size        = CMBlockBufferGetDataLength(block_buffer);
-        ctx->audio_buffer             = av_malloc(ctx->audio_buffer_size);
-        if (!ctx->audio_buffer) {
-            av_log(s, AV_LOG_ERROR, "error allocating audio buffer\n");
-            return 1;
-        }
-    }
-
-    CFRelease(ctx->current_audio_frame);
-    ctx->current_audio_frame = nil;
-
-    unlock_frames(ctx);
-
     return 0;
 }
 
@@ -673,9 +576,11 @@ static int avf_read_header(AVFormatContext *s)
 
     ctx->first_pts          = av_gettime();
     ctx->first_audio_pts    = av_gettime();
+    // Create dispatch queue and set delegate
+    CMBufferQueueCreate(kCFAllocatorDefault, 0, CMBufferQueueGetCallbacksForSampleBuffersSortedByOutputPTS(), &ctx->frame_buffer);
+    ctx->avf_delegate   = [[AVFFrameReceiver alloc] initWithContext:ctx];
+    ctx->dispatch_queue = dispatch_queue_create("org.ffmpeg.dispatch_queue", NULL);
 
-    pthread_mutex_init(&ctx->frame_lock, NULL);
-    pthread_cond_init(&ctx->frame_wait_cond, NULL);
 
 #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
     CGGetActiveDisplayList(0, NULL, &num_screens);
@@ -895,70 +800,106 @@ fail:
 static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     AVFContext* ctx = (AVFContext*)s->priv_data;
+    CMItemCount count;
+    CMSampleTimingInfo timing_info;
+    int64_t *first_pts = NULL;
 
     do {
-        CVImageBufferRef image_buffer;
-        lock_frames(ctx);
+        CMSampleBufferRef current_frame = (CMSampleBufferRef)CMBufferQueueDequeueAndRetain(ctx->frame_buffer);
+
+        if (!current_frame) {
+            CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.001, YES);
+            continue;
+        }
 
-        image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
+        if (!CMSampleBufferDataIsReady(current_frame)) {
+            CFRelease(current_frame);
+            continue;
+        }
+
+        if (CMSampleBufferGetOutputSampleTimingInfoArray(current_frame, 1, &timing_info, &count) != noErr) {
+            CFRelease(current_frame);
+            continue;
+        } else if (timing_info.presentationTimeStamp.value == 0) {
+            CFRelease(current_frame);
+            continue;
+        }
 
-        if (ctx->current_frame != nil) {
+        CVImageBufferRef image_buffer = CMSampleBufferGetImageBuffer(current_frame);
+        CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(current_frame);
+
+        if (image_buffer) {
             void *data;
+
             if (av_new_packet(pkt, (int)CVPixelBufferGetDataSize(image_buffer)) < 0) {
+                CFRelease(current_frame);
                 return AVERROR(EIO);
             }
 
-            CMItemCount count;
-            CMSampleTimingInfo timing_info;
-
-            if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, &timing_info, &count) == noErr) {
-                AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
-                pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
-            }
-
             pkt->stream_index  = ctx->video_stream_index;
             pkt->flags        |= AV_PKT_FLAG_KEY;
 
             CVPixelBufferLockBaseAddress(image_buffer, 0);
 
-            data = CVPixelBufferGetBaseAddress(image_buffer);
-            memcpy(pkt->data, data, pkt->size);
+            if (CVPixelBufferIsPlanar(image_buffer)) {
+                int8_t *dst = pkt->data;
+                for (int i = 0; i < CVPixelBufferGetPlaneCount(image_buffer); i++) {
+                    data             = CVPixelBufferGetBaseAddressOfPlane(image_buffer, i);
+                    size_t data_size = CVPixelBufferGetBytesPerRowOfPlane(image_buffer, i) *
+                                       CVPixelBufferGetHeightOfPlane(image_buffer, i);
+                    memcpy(dst, data, data_size);
+                    dst += data_size;
+                }
+            } else {
+                data = CVPixelBufferGetBaseAddress(image_buffer);
+                memcpy(pkt->data, data, pkt->size);
+            }
 
             CVPixelBufferUnlockBaseAddress(image_buffer, 0);
-            CFRelease(ctx->current_frame);
-            ctx->current_frame = nil;
-        } else if (ctx->current_audio_frame != nil) {
-            CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
-            int block_buffer_size         = CMBlockBufferGetDataLength(block_buffer);
+
+            first_pts = &ctx->first_pts;
+        } else if (block_buffer) {
+            int block_buffer_size = CMBlockBufferGetDataLength(block_buffer);
 
             if (!block_buffer || !block_buffer_size) {
+                CFRelease(current_frame);
                 return AVERROR(EIO);
             }
 
-            if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) {
+            // evaluate kAudioFormatFlagIsNonInterleaved which might have changed even if the capture session is locked
+            CMFormatDescriptionRef format_desc                   = CMSampleBufferGetFormatDescription(current_frame);
+            const AudioStreamBasicDescription *audio_format_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
+            int audio_non_interleaved                            = audio_format_desc->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
+
+            if (audio_non_interleaved && !ctx->audio_buffer) {
+                ctx->audio_buffer      = av_malloc(block_buffer_size);
+                ctx->audio_buffer_size = block_buffer_size;
+                if (!ctx->audio_buffer) {
+                    av_log(ctx, AV_LOG_ERROR, "error allocating audio buffer\n");
+                    CFRelease(current_frame);
+                    return AVERROR(EIO); // something better for no memory?
+                }
+            }
+
+            if (audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) {
+                CFRelease(current_frame);
                 return AVERROR_BUFFER_TOO_SMALL;
             }
 
             if (av_new_packet(pkt, block_buffer_size) < 0) {
+                CFRelease(current_frame);
                 return AVERROR(EIO);
             }
 
-            CMItemCount count;
-            CMSampleTimingInfo timing_info;
-
-            if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
-                AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
-                pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
-            }
-
             pkt->stream_index  = ctx->audio_stream_index;
             pkt->flags        |= AV_PKT_FLAG_KEY;
 
-            if (ctx->audio_non_interleaved) {
+            if (audio_non_interleaved) {
                 int sample, c, shift, num_samples;
 
                 OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer);
                 if (ret != kCMBlockBufferNoErr) {
+                    CFRelease(current_frame);
                     return AVERROR(EIO);
                 }
 
@@ -970,7 +911,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
                     int##bps##_t **src;                                                \
                     int##bps##_t *dest;                                                \
                     src = av_malloc(ctx->audio_channels * sizeof(int##bps##_t*));      \
-                    if (!src) return AVERROR(EIO);                                     \
+                    if (!src) {CFRelease(current_frame); return AVERROR(EIO);}         \
                     for (c = 0; c < ctx->audio_channels; c++) {                        \
                         src[c] = ((int##bps##_t*)ctx->audio_buffer) + c * num_samples; \
                     }                                                                  \
@@ -990,18 +931,24 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
             } else {
                 OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
                 if (ret != kCMBlockBufferNoErr) {
+                    CFRelease(current_frame);
                     return AVERROR(EIO);
                 }
             }
 
-            CFRelease(ctx->current_audio_frame);
-            ctx->current_audio_frame = nil;
-        } else {
-            pkt->data = NULL;
-            pthread_cond_wait(&ctx->frame_wait_cond, &ctx->frame_lock);
+            first_pts = &ctx->first_audio_pts;
+        }
+
+        if (first_pts) {
+            if (!*first_pts) {
+                *first_pts = timing_info.presentationTimeStamp.value;
+            }
+            // TODO: this produces non-monotonous DTS if bits_per_sample == 16
+            AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
+            pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value - *first_pts, timebase_q, avf_time_base_q);
         }
 
-        unlock_frames(ctx);
+        CFRelease(current_frame);
     } while (!pkt->data);
 
     return 0;
-- 
2.3.2 (Apple Git-55)



More information about the ffmpeg-devel mailing list