[FFmpeg-devel] [PATCH 1/2] vda: decode frame synchronously.

Tue Aug 7 21:46:46 CEST 2012

From: Sebastien Zwickert <dilaroga at free.fr>

The current implementation of VDA uses the default mode: the hardware decoder is running
asynchronously, that's why the HWAccel VDA must manage its own queue and then depends
on pthreads. The client needs to wait until the queue is not empty in order to start
processing the decoded frame. In VLC, there's a short flicker at the beginning of every
movies and when we seek in movies due to this asynchrone mode.

These changes uses the hardware decoder synchronously so that VDA module is now able to
return the decoded frame into the corresponding frame's data. Thus, the queue and its
glue code become needless just like the pthreads dependency.

But this patch removes a public struct and some public functions relative to the
management of the queue so I bumped the minor version number.

---
 configure                 |    4 +-
 libavcodec/vda.c          |  154 +++++----------------------------------------
 libavcodec/vda.h          |   50 +--------------
 libavcodec/vda_h264.c     |   12 ++--
 libavcodec/vda_internal.h |    5 +-
 libavcodec/version.h      |    2 +-
 libavutil/pixfmt.h        |    2 +-
 7 files changed, 29 insertions(+), 200 deletions(-)

diff --git a/configure b/configure
index d7f180f..56f2089 100755
--- a/configure
+++ b/configure
@@ -1532,7 +1532,7 @@ h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
 h264_dxva2_hwaccel_deps="dxva2api_h"
 h264_dxva2_hwaccel_select="dxva2 h264_decoder"
 h264_vaapi_hwaccel_select="vaapi h264_decoder"
-h264_vda_hwaccel_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
+h264_vda_hwaccel_deps="VideoDecodeAcceleration_VDADecoder_h"
 h264_vda_hwaccel_select="vda h264_decoder"
 h264_vdpau_decoder_select="vdpau h264_decoder"
 iac_decoder_select="fft mdct sinewin"
@@ -1647,7 +1647,7 @@ zmbv_encoder_select="zlib"
 
 crystalhd_deps="libcrystalhd_libcrystalhd_if_h"
 vaapi_deps="va_va_h"
-vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
+vda_deps="VideoDecodeAcceleration_VDADecoder_h"
 vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h"
 
 # parsers
diff --git a/libavcodec/vda.c b/libavcodec/vda.c
index d3ced6c..627a0a3 100644
--- a/libavcodec/vda.c
+++ b/libavcodec/vda.c
@@ -20,65 +20,13 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <pthread.h>
 #include <CoreFoundation/CFDictionary.h>
 #include <CoreFoundation/CFNumber.h>
 #include <CoreFoundation/CFData.h>
-#include <CoreFoundation/CFString.h>
 
 #include "libavutil/avutil.h"
 #include "vda_internal.h"
 
-/* Helper to create a dictionary according to the given pts. */
-static CFDictionaryRef vda_dictionary_with_pts(int64_t i_pts)
-{
-    CFStringRef key = CFSTR("FF_VDA_DECODER_PTS_KEY");
-    CFNumberRef value = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &i_pts);
-    CFDictionaryRef user_info = CFDictionaryCreate(kCFAllocatorDefault,
-                                                   (const void **)&key,
-                                                   (const void **)&value,
-                                                   1,
-                                                   &kCFTypeDictionaryKeyCallBacks,
-                                                   &kCFTypeDictionaryValueCallBacks);
-    CFRelease(value);
-    return user_info;
-}
-
-/* Helper to retrieve the pts from the given dictionary. */
-static int64_t vda_pts_from_dictionary(CFDictionaryRef user_info)
-{
-    CFNumberRef pts;
-    int64_t outValue = 0;
-
-    if (!user_info)
-        return 0;
-
-    pts = CFDictionaryGetValue(user_info, CFSTR("FF_VDA_DECODER_PTS_KEY"));
-
-    if (pts)
-        CFNumberGetValue(pts, kCFNumberSInt64Type, &outValue);
-
-    return outValue;
-}
-
-/* Removes and releases all frames from the queue. */
-static void vda_clear_queue(struct vda_context *vda_ctx)
-{
-    vda_frame *top_frame;
-
-    pthread_mutex_lock(&vda_ctx->queue_mutex);
-
-    while (vda_ctx->queue) {
-        top_frame = vda_ctx->queue;
-        vda_ctx->queue = top_frame->next_frame;
-        ff_vda_release_vda_frame(top_frame);
-    }
-
-    pthread_mutex_unlock(&vda_ctx->queue_mutex);
-}
-
-
-/* Decoder callback that adds the vda frame to the queue in display order. */
 static void vda_decoder_callback (void *vda_hw_ctx,
                                   CFDictionaryRef user_info,
                                   OSStatus status,
@@ -86,8 +34,6 @@ static void vda_decoder_callback (void *vda_hw_ctx,
                                   CVImageBufferRef image_buffer)
 {
     struct vda_context *vda_ctx = (struct vda_context*)vda_hw_ctx;
-    vda_frame *new_frame;
-    vda_frame *queue_walker;
 
     if (!image_buffer)
         return;
@@ -95,46 +41,14 @@ static void vda_decoder_callback (void *vda_hw_ctx,
     if (vda_ctx->cv_pix_fmt_type != CVPixelBufferGetPixelFormatType(image_buffer))
         return;
 
-    new_frame = av_mallocz(sizeof(vda_frame));
-    if (!new_frame)
-        return;
-
-    new_frame->next_frame = NULL;
-    new_frame->cv_buffer = CVPixelBufferRetain(image_buffer);
-    new_frame->pts = vda_pts_from_dictionary(user_info);
-
-    pthread_mutex_lock(&vda_ctx->queue_mutex);
-
-    queue_walker = vda_ctx->queue;
-
-    if (!queue_walker || (new_frame->pts < queue_walker->pts)) {
-        /* we have an empty queue, or this frame earlier than the current queue head */
-        new_frame->next_frame = queue_walker;
-        vda_ctx->queue = new_frame;
-    } else {
-        /* walk the queue and insert this frame where it belongs in display order */
-        vda_frame *next_frame;
-
-        while (1) {
-            next_frame = queue_walker->next_frame;
-
-            if (!next_frame || (new_frame->pts < next_frame->pts)) {
-                new_frame->next_frame = next_frame;
-                queue_walker->next_frame = new_frame;
-                break;
-            }
-            queue_walker = next_frame;
-        }
-    }
-
-    pthread_mutex_unlock(&vda_ctx->queue_mutex);
+    vda_ctx->cv_buffer = CVPixelBufferRetain(image_buffer);
 }
 
 int ff_vda_create_decoder(struct vda_context *vda_ctx,
                           uint8_t *extradata,
                           int extradata_size)
 {
-    OSStatus status = kVDADecoderNoErr;
+    OSStatus status;
     CFNumberRef height;
     CFNumberRef width;
     CFNumberRef format;
@@ -147,8 +61,6 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
     vda_ctx->bitstream = NULL;
     vda_ctx->ref_size = 0;
 
-    pthread_mutex_init(&vda_ctx->queue_mutex, NULL);
-
     /* Each VCL NAL in the bistream sent to the decoder
      * is preceded by a 4 bytes length header.
      * Change the avcC atom header if needed, to signal headers of 4 bytes. */
@@ -216,10 +128,7 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
     CFRelease(cv_pix_fmt);
     CFRelease(buffer_attributes);
 
-    if (kVDADecoderNoErr != status)
-        return status;
-
-    return 0;
+    return status;
 }
 
 int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
@@ -229,61 +138,28 @@ int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
     if (vda_ctx->decoder)
         status = VDADecoderDestroy(vda_ctx->decoder);
 
-    vda_clear_queue(vda_ctx);
-
-    pthread_mutex_destroy(&vda_ctx->queue_mutex);
-
     if (vda_ctx->bitstream)
         av_freep(&vda_ctx->bitstream);
 
-    if (kVDADecoderNoErr != status)
-        return status;
-
-    return 0;
+    return status;
 }
 
-vda_frame *ff_vda_queue_pop(struct vda_context *vda_ctx)
+int ff_vda_decoder_decode(struct vda_context *vda_ctx)
 {
-    vda_frame *top_frame;
-
-    if (!vda_ctx->queue)
-        return NULL;
-
-    pthread_mutex_lock(&vda_ctx->queue_mutex);
-    top_frame = vda_ctx->queue;
-    vda_ctx->queue = top_frame->next_frame;
-    pthread_mutex_unlock(&vda_ctx->queue_mutex);
-
-    return top_frame;
-}
-
-void ff_vda_release_vda_frame(vda_frame *frame)
-{
-    if (frame) {
-        CVPixelBufferRelease(frame->cv_buffer);
-        av_freep(&frame);
-    }
-}
-
-int ff_vda_decoder_decode(struct vda_context *vda_ctx,
-                          uint8_t *bitstream,
-                          int bitstream_size,
-                          int64_t frame_pts)
-{
-    OSStatus status = kVDADecoderNoErr;
-    CFDictionaryRef user_info;
+    OSStatus status;
     CFDataRef coded_frame;
+    uint32_t flush_flags = 1 << 0; ///< kVDADecoderFlush_emitFrames
 
-    coded_frame = CFDataCreate(kCFAllocatorDefault, bitstream, bitstream_size);
-    user_info = vda_dictionary_with_pts(frame_pts);
+    coded_frame = CFDataCreate(kCFAllocatorDefault,
+                               vda_ctx->bitstream,
+                               vda_ctx->bitstream_size);
 
-    status = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, user_info);
+    status = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, NULL);
 
-    CFRelease(user_info);
-    CFRelease(coded_frame);
+    if (kVDADecoderNoErr == status)
+        status = VDADecoderFlush(vda_ctx->decoder, flush_flags);
 
-    if (kVDADecoderNoErr != status)
-        return status;
+    CFRelease(coded_frame);
 
-    return 0;
+    return status;
 }
diff --git a/libavcodec/vda.h b/libavcodec/vda.h
index 4ea0e9f..242cdbc 100644
--- a/libavcodec/vda.h
+++ b/libavcodec/vda.h
@@ -29,7 +29,6 @@
  * Public libavcodec VDA header.
  */
 
-#include <pthread.h>
 #include <stdint.h>
 
 // emmintrin.h is unable to compile with -std=c99 -Werror=missing-prototypes
@@ -48,35 +47,6 @@
  */
 
 /**
- *  This structure is used to store a decoded frame information and data.
- */
-typedef struct {
-    /**
-    * The PTS of the frame.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
-    int64_t             pts;
-
-    /**
-    * The CoreVideo buffer that contains the decoded data.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
-    CVPixelBufferRef    cv_buffer;
-
-    /**
-    * A pointer to the next frame.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
-    struct vda_frame    *next_frame;
-} vda_frame;
-
-/**
  * This structure is used to provide the necessary configurations and data
  * to the VDA FFmpeg HWAccel implementation.
  *
@@ -92,20 +62,12 @@ struct vda_context {
     VDADecoder          decoder;
 
     /**
-    * VDA frames queue ordered by presentation timestamp.
+    * The Core Video pixel buffer that contains the current image data.
     *
     * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
+    * - decoding: Set by libavcodec. Unset by user.
     */
-    vda_frame           *queue;
-
-    /**
-    * Mutex for locking queue operations.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
-    pthread_mutex_t     queue_mutex;
+    CVPixelBufferRef    cv_buffer;
 
     /**
     * The frame width.
@@ -172,12 +134,6 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
 /** Destroy the video decoder. */
 int ff_vda_destroy_decoder(struct vda_context *vda_ctx);
 
-/** Return the top frame of the queue. */
-vda_frame *ff_vda_queue_pop(struct vda_context *vda_ctx);
-
-/** Release the given frame. */
-void ff_vda_release_vda_frame(vda_frame *frame);
-
 /**
  * @}
  */
diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index 254268f..b1ff0a2 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -47,7 +47,9 @@ static int decode_slice(AVCodecContext *avctx,
     if (!vda_ctx->decoder)
         return -1;
 
-    tmp = av_fast_realloc(vda_ctx->bitstream, &vda_ctx->ref_size, vda_ctx->bitstream_size+size+4);
+    tmp = av_fast_realloc(vda_ctx->bitstream,
+                          &vda_ctx->ref_size,
+                          vda_ctx->bitstream_size+size+4);
     if (!tmp)
         return AVERROR(ENOMEM);
 
@@ -71,13 +73,12 @@ static int end_frame(AVCodecContext *avctx)
     if (!vda_ctx->decoder || !vda_ctx->bitstream)
         return -1;
 
-    status = ff_vda_decoder_decode(vda_ctx, vda_ctx->bitstream,
-                                   vda_ctx->bitstream_size,
-                                   frame->reordered_opaque);
-
+    status = ff_vda_decoder_decode(vda_ctx);
     if (status)
         av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
 
+    frame->data[3] = (void*)vda_ctx->cv_buffer;
+
     return status;
 }
 
@@ -89,5 +90,4 @@ AVHWAccel ff_h264_vda_hwaccel = {
     .start_frame    = start_frame,
     .decode_slice   = decode_slice,
     .end_frame      = end_frame,
-    .priv_data_size = 0,
 };
diff --git a/libavcodec/vda_internal.h b/libavcodec/vda_internal.h
index df7305b..e503d3c 100644
--- a/libavcodec/vda_internal.h
+++ b/libavcodec/vda_internal.h
@@ -32,10 +32,7 @@
  */
 
 /** Send frame data to the hardware decoder. */
-int ff_vda_decoder_decode(struct vda_context *vda_ctx,
-                          uint8_t *bitstream,
-                          int bitstream_size,
-                          int64_t frame_pts);
+int ff_vda_decoder_decode(struct vda_context *vda_ctx);
 
 /* @} */
 
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 1f2af62..a5e7fed 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -27,7 +27,7 @@
  */
 
 #define LIBAVCODEC_VERSION_MAJOR 54
-#define LIBAVCODEC_VERSION_MINOR 50
+#define LIBAVCODEC_VERSION_MINOR 51
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index fa771bc..bee8cb2 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -156,7 +156,7 @@ enum PixelFormat {
     PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
     PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
     PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
-    PIX_FMT_VDA_VLD,    ///< hardware decoding through VDA
+    PIX_FMT_VDA_VLD,    ///< hardware decoding through VDA, Picture.data[3] contains a CVPixelBufferRef object
 
 #ifdef AV_PIX_FMT_ABI_GIT_MASTER
     PIX_FMT_RGBA64BE,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-- 
1.7.5.4