[FFmpeg-devel] [PATCH] Add VDA support to ffmpeg

Xunzhen Quan quanxunzhen at gmail.com
Wed Jul 18 15:27:52 CEST 2012


Hi!

The patch for ffmpeg attached add VDA (Video Decode Acceleration framework)
support so that ffmpeg and mplayer will be able to play H.264 video with
hardware accelerating on OS X.

This code seem to work well for me, but I think it may need more tests.

I'm new here. I find that it suggest to attach patches have mime-type
text/*, but I don't know how to change the mime-type of attachment in
Gmail, so I change the extension name, is it ok?

Xidorn Quan
-------------- next part --------------
From 0170d73abcff06932065523ad3bb94b1c68c92c7 Mon Sep 17 00:00:00 2001
From: upsuper <quanxunzhen at gmail.com>
Date: Wed, 18 Jul 2012 17:31:03 +0800
Subject: [PATCH 1/2] add h264 vda support

---
 libavcodec/allcodecs.c    |   1 +
 libavcodec/vda.c          |  10 +++
 libavcodec/vda.h          |   4 +-
 libavcodec/vda_h264.c     | 211 ++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/vda_internal.h |   2 +
 5 files changed, 226 insertions(+), 2 deletions(-)

diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 3b18f57..c301ee7 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -132,6 +132,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER (H264, h264);
     REGISTER_DECODER (H264_CRYSTALHD, h264_crystalhd);
     REGISTER_DECODER (H264_VDPAU, h264_vdpau);
+    REGISTER_DECODER (H264_VDA, h264_vda);
     REGISTER_ENCDEC  (HUFFYUV, huffyuv);
     REGISTER_DECODER (IDCIN, idcin);
     REGISTER_DECODER (IFF_BYTERUN1, iff_byterun1);
diff --git a/libavcodec/vda.c b/libavcodec/vda.c
index d3ced6c..c56a1f6 100644
--- a/libavcodec/vda.c
+++ b/libavcodec/vda.c
@@ -26,6 +26,7 @@
 #include <CoreFoundation/CFData.h>
 #include <CoreFoundation/CFString.h>
 
+#include "avcodec.h"
 #include "libavutil/avutil.h"
 #include "vda_internal.h"
 
@@ -287,3 +288,12 @@ int ff_vda_decoder_decode(struct vda_context *vda_ctx,
 
     return 0;
 }
+
+int ff_vda_decoder_flush(struct vda_context *vda_ctx)
+{
+    OSStatus status;
+    status = VDADecoderFlush(vda_ctx->decoder, 0);
+    if (kVDADecoderNoErr != status)
+        return status;
+    return 0;
+}
diff --git a/libavcodec/vda.h b/libavcodec/vda.h
index 4ea0e9f..8db16cb 100644
--- a/libavcodec/vda.h
+++ b/libavcodec/vda.h
@@ -50,7 +50,7 @@
 /**
  *  This structure is used to store a decoded frame information and data.
  */
-typedef struct {
+typedef struct _vda_frame {
     /**
     * The PTS of the frame.
     *
@@ -73,7 +73,7 @@ typedef struct {
     * - encoding: unused
     * - decoding: Set/Unset by libavcodec.
     */
-    struct vda_frame    *next_frame;
+    struct _vda_frame    *next_frame;
 } vda_frame;
 
 /**
diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index 254268f..2e7811c 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -21,6 +21,8 @@
  */
 
 #include "h264.h"
+#include "golomb.h"
+#include "libavutil/imgutils.h"
 #include "vda_internal.h"
 
 static int start_frame(AVCodecContext *avctx,
@@ -91,3 +93,212 @@ AVHWAccel ff_h264_vda_hwaccel = {
     .end_frame      = end_frame,
     .priv_data_size = 0,
 };
+
+typedef struct {
+    struct vda_context v;
+    AVCodecParserContext *parser;
+    AVFrame pic;
+    int idr_order;
+} H264VDAContext;
+
+static av_cold int init(AVCodecContext *avctx)
+{
+    H264VDAContext *c = avctx->priv_data;
+    struct vda_context *v = &c->v;
+    int ret;
+
+    v->width = avctx->width;
+    v->height = avctx->height;
+    av_log(avctx, AV_LOG_INFO, "codec_tag: %x\n", avctx->codec_tag);
+    // FIXME might should be avctx->codec_tag,
+    // but if we don't set it as 'avc1', vda would refuse to decode
+    v->format = 'avc1'; // avctx->codec_tag;
+    v->cv_pix_fmt_type = kCVPixelFormatType_422YpCbCr8_yuvs;
+    avctx->pix_fmt = PIX_FMT_YUYV422;
+
+    ret = ff_vda_create_decoder(v, avctx->extradata, avctx->extradata_size);
+    if (ret) {
+        av_log(avctx, AV_LOG_ERROR, "init decoder failed: %d\n", ret);
+        return -1;
+    }
+
+    c->parser = av_parser_init(avctx->codec->id);
+    if (!c->parser) {
+        av_log(avctx, AV_LOG_ERROR, "cannot open h.264 parser!\n");
+        return -1;
+    }
+    c->parser->flags = PARSER_FLAG_COMPLETE_FRAMES;
+    c->idr_order = 0;
+
+    return 0;
+}
+
+static av_cold int uninit(AVCodecContext *avctx)
+{
+    H264VDAContext *c = avctx->priv_data;
+    struct vda_context *v = &c->v;
+    int ret;
+
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+
+    av_parser_close(c->parser);
+
+    ret = ff_vda_destroy_decoder(v);
+    if (ret) {
+        av_log(avctx, AV_LOG_ERROR, "uninit decoder failed: %d\n", ret);
+        return -1;
+    }
+
+    return 0;
+}
+
+static uint64_t get_frame_poc(AVCodecContext *avctx, uint8_t *buf, int len)
+{
+    H264VDAContext *c = avctx->priv_data;
+    struct vda_context *v = &c->v;
+    H264Context *h = c->parser->priv_data;
+    int top_foc = INT_MAX,
+        bottom_foc = INT_MAX;
+    int poc;
+    uint8_t *pout;
+    int psize;
+    int index;
+
+    index = av_parser_parse2(c->parser, avctx, &pout, &psize, buf, len,
+            avctx->pkt->pts, avctx->pkt->dts, 0);
+
+    if (h->nal_unit_type == NAL_IDR_SLICE) {
+        get_ue_golomb_long(&h->s.gb); // skip idr_pic_id
+        c->idr_order++;
+    }
+    if (h->sps.poc_type == 0) {
+
+        if (h->nal_unit_type == NAL_IDR_SLICE) {
+            h->prev_poc_msb = h->prev_poc_lsb = 0;
+        } else {
+            // FIXME here we assume there are no picture which
+            // included a memory_management_control_operation equal to 5
+            h->prev_poc_msb = h->poc_msb;
+            h->prev_poc_lsb = h->poc_lsb;
+        }
+
+        int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
+        h->poc_lsb = get_bits(&h->s.gb, h->sps.log2_max_poc_lsb);
+        if (h->pps.pic_order_present && h->s.picture_structure == PICT_FRAME)
+            h->delta_poc_bottom = get_se_golomb(&h->s.gb);
+
+        if (h->poc_lsb < h->prev_poc_lsb &&
+                h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2)
+            h->poc_msb = h->prev_poc_msb + max_poc_lsb;
+        else if (h->poc_lsb > h->prev_poc_lsb &&
+                h->poc_lsb - h->prev_poc_lsb >= max_poc_lsb / 2)
+            h->poc_msb = h->prev_poc_msb - max_poc_lsb;
+
+        if (h->s.picture_structure != PICT_BOTTOM_FIELD)
+            top_foc = h->poc_msb + h->poc_lsb;
+        if (h->s.picture_structure != PICT_FRAME)
+            bottom_foc = top_foc + h->delta_poc_bottom;
+        else if (h->s.picture_structure == PICT_BOTTOM_FIELD)
+            bottom_foc = h->poc_msb + h->poc_lsb;
+    } else if (h->sps.poc_type == 1) {
+        // FIXME
+    } else if (h->sps.poc_type == 2) {
+        // FIXME
+    }
+
+    return ((uint64_t)c->idr_order << 32) | FFMIN(top_foc, bottom_foc);
+}
+
+static int decode(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt)
+{
+    H264VDAContext *c = avctx->priv_data;
+    struct vda_context *v = &c->v;
+    vda_frame *frame;
+    int width, height, bwidth;
+    CVPixelBufferRef buffer;
+    int ret, len = avpkt->size;
+    uint8_t *buf = avpkt->data;
+
+    if (len) {
+        uint64_t frame_poc = get_frame_poc(avctx, buf, len);
+        ret = ff_vda_decoder_decode(v, buf, len, frame_poc);
+        if (ret) {
+            av_log(avctx, AV_LOG_ERROR, "decoder decode failed: %d\n", ret);
+            return -1;
+        }
+        avctx->has_b_frames++;
+    }
+
+    frame = ff_vda_queue_pop(v);
+    if (frame) {
+        if (c->pic.data[0])
+            avctx->release_buffer(avctx, &c->pic);
+        if (avctx->get_buffer(avctx, &c->pic) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+            ff_vda_release_vda_frame(frame);
+            return -1;
+        }
+
+        buffer = frame->cv_buffer;
+        width = CVPixelBufferGetWidth(buffer);
+        height = CVPixelBufferGetHeight(buffer);
+        bwidth = av_image_get_linesize(avctx->pix_fmt, width, 0);
+
+        ret = CVPixelBufferLockBaseAddress(buffer, 0);
+        if (ret) {
+            av_log(avctx, AV_LOG_ERROR, "lock base address failed: %d\n", ret);
+            ff_vda_release_vda_frame(frame);
+            return -1;
+        }
+        av_image_copy_plane(c->pic.data[0], c->pic.linesize[0],
+                CVPixelBufferGetBaseAddress(buffer), bwidth, bwidth, height);
+        CVPixelBufferUnlockBaseAddress(buffer, 0);
+
+        *data_size = sizeof(AVFrame);
+        *(AVFrame *)data = c->pic;
+        if (*data_size > 0)
+            avctx->has_b_frames--;
+
+        ff_vda_release_vda_frame(frame);
+    }
+    return len;
+}
+
+static void flush(AVCodecContext *avctx)
+{
+    H264VDAContext *c = avctx->priv_data;
+    struct vda_context *v = &c->v;
+    vda_frame *frame;
+
+    ff_vda_decoder_flush(v);
+    while ((frame = ff_vda_queue_pop(v)) != NULL)
+        ff_vda_release_vda_frame(frame);
+}
+
+#if CONFIG_H264_VDA_DECODER
+static const AVOption options[] = {
+    { NULL, },
+};
+
+static AVClass h264_vda_class = {
+    "H264 VDA Decoder",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_h264_vda_decoder = {
+    .name           = "h264_vda",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = CODEC_ID_H264,
+    .priv_data_size = sizeof(H264VDAContext),
+    .init           = init,
+    .close          = uninit,
+    .decode         = decode,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .flush          = flush,
+    .long_name      = NULL_IF_CONFIG_SMALL("H.264 (VDA acceleration)"),
+    .priv_class     = &h264_vda_class
+};
+#endif
diff --git a/libavcodec/vda_internal.h b/libavcodec/vda_internal.h
index df7305b..c522801 100644
--- a/libavcodec/vda_internal.h
+++ b/libavcodec/vda_internal.h
@@ -37,6 +37,8 @@ int ff_vda_decoder_decode(struct vda_context *vda_ctx,
                           int bitstream_size,
                           int64_t frame_pts);
 
+int ff_vda_decoder_flush(struct vda_context *vda_ctx);
+
 /* @} */
 
 #endif /* AVCODEC_VDA_INTERNAL_H */
-- 
1.7.11.2


From 5d3a871543d8ff5102ab9132f5feb7d312a0cd1b Mon Sep 17 00:00:00 2001
From: upsuper <quanxunzhen at gmail.com>
Date: Wed, 18 Jul 2012 20:55:57 +0800
Subject: [PATCH 2/2] modify some code

---
 libavcodec/vda.c      | 2 +-
 libavcodec/vda.h      | 4 ++--
 libavcodec/vda_h264.c | 7 +++----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/libavcodec/vda.c b/libavcodec/vda.c
index c56a1f6..7f0ba67 100644
--- a/libavcodec/vda.c
+++ b/libavcodec/vda.c
@@ -26,9 +26,9 @@
 #include <CoreFoundation/CFData.h>
 #include <CoreFoundation/CFString.h>
 
+#include "vda_internal.h"
 #include "avcodec.h"
 #include "libavutil/avutil.h"
-#include "vda_internal.h"
 
 /* Helper to create a dictionary according to the given pts. */
 static CFDictionaryRef vda_dictionary_with_pts(int64_t i_pts)
diff --git a/libavcodec/vda.h b/libavcodec/vda.h
index 8db16cb..f9a9b0d 100644
--- a/libavcodec/vda.h
+++ b/libavcodec/vda.h
@@ -50,7 +50,7 @@
 /**
  *  This structure is used to store a decoded frame information and data.
  */
-typedef struct _vda_frame {
+typedef struct vda_frame {
     /**
     * The PTS of the frame.
     *
@@ -73,7 +73,7 @@ typedef struct _vda_frame {
     * - encoding: unused
     * - decoding: Set/Unset by libavcodec.
     */
-    struct _vda_frame    *next_frame;
+    struct vda_frame    *next_frame;
 } vda_frame;
 
 /**
diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index 2e7811c..83c43b7 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -20,10 +20,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "vda_internal.h"
 #include "h264.h"
 #include "golomb.h"
 #include "libavutil/imgutils.h"
-#include "vda_internal.h"
 
 static int start_frame(AVCodecContext *avctx,
                        av_unused const uint8_t *buffer,
@@ -156,11 +156,9 @@ static av_cold int uninit(AVCodecContext *avctx)
 static uint64_t get_frame_poc(AVCodecContext *avctx, uint8_t *buf, int len)
 {
     H264VDAContext *c = avctx->priv_data;
-    struct vda_context *v = &c->v;
     H264Context *h = c->parser->priv_data;
     int top_foc = INT_MAX,
         bottom_foc = INT_MAX;
-    int poc;
     uint8_t *pout;
     int psize;
     int index;
@@ -173,6 +171,7 @@ static uint64_t get_frame_poc(AVCodecContext *avctx, uint8_t *buf, int len)
         c->idr_order++;
     }
     if (h->sps.poc_type == 0) {
+        int max_poc_lsb;
 
         if (h->nal_unit_type == NAL_IDR_SLICE) {
             h->prev_poc_msb = h->prev_poc_lsb = 0;
@@ -183,7 +182,7 @@ static uint64_t get_frame_poc(AVCodecContext *avctx, uint8_t *buf, int len)
             h->prev_poc_lsb = h->poc_lsb;
         }
 
-        int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
+        max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
         h->poc_lsb = get_bits(&h->s.gb, h->sps.log2_max_poc_lsb);
         if (h->pps.pic_order_present && h->s.picture_structure == PICT_FRAME)
             h->delta_poc_bottom = get_se_golomb(&h->s.gb);
-- 
1.7.11.2


More information about the ffmpeg-devel mailing list