[FFmpeg-cvslog] videotoolbox: add hwcontext support

Mon May 15 12:39:03 EEST 2017

ffmpeg | branch: master | wm4 <nfxjfg at googlemail.com> | Mon May 15 11:27:24 2017 +0200| [532b23f079b52f4789be1f20ce232286ce4ffa13] | committer: wm4

videotoolbox: add hwcontext support

This adds tons of code for no other benefit than making VideoToolbox
support conform with the new hwaccel API (using hw_device_ctx and
hw_frames_ctx).

Since VideoToolbox decoding does not actually require the user to
allocate frames, the new code does mostly nothing.

One benefit is that ffmpeg_videotoolbox.c can be dropped once generic
hwaccel support for ffmpeg.c is merged from Libav.

Does not consider VDA or VideoToolbox encoding.

Fun fact: the frame transfer functions are copied from vaapi, as the
mapping makes copying generic boilerplate. Mapping itself is not
exported by the VT code, because I don't know how to test.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=532b23f079b52f4789be1f20ce232286ce4ffa13
---

 doc/APIchanges                     |   8 ++
 libavcodec/vda_vt_internal.h       |   7 ++
 libavcodec/version.h               |   4 +-
 libavcodec/videotoolbox.c          | 180 +++++++++++++++++++++++++--
 libavutil/Makefile                 |   3 +
 libavutil/hwcontext.c              |   3 +
 libavutil/hwcontext.h              |   1 +
 libavutil/hwcontext_internal.h     |   1 +
 libavutil/hwcontext_videotoolbox.c | 243 +++++++++++++++++++++++++++++++++++++
 libavutil/hwcontext_videotoolbox.h |  54 +++++++++
 libavutil/version.h                |   2 +-
 11 files changed, 493 insertions(+), 13 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 09b1a49798..67a6142401 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,14 @@ libavutil:     2015-08-28
 
 API changes, most recent first:
 
+2017-05-15 - xxxxxxxxxx - lavc 57.96.100 - avcodec.h
+  VideoToolbox hardware-accelerated decoding now supports the new hwaccel API,
+  which can create the decoder context and allocate hardware frames automatically.
+  See AVCodecContext.hw_device_ctx and AVCodecContext.hw_frames_ctx.
+
+2017-05-15 - xxxxxxxxxx - lavu 57.63.100 - hwcontext.h
+  Add AV_HWDEVICE_TYPE_VIDEOTOOLBOX and implementation.
+
 2017-xx-xx - xxxxxxx - lavc 57.95.100 / 57.31.0 - avcodec.h
   Add AVCodecContext.apply_cropping to control whether cropping
   is handled by libavcodec or the caller.
diff --git a/libavcodec/vda_vt_internal.h b/libavcodec/vda_vt_internal.h
index 9ff63ccc52..e55a813899 100644
--- a/libavcodec/vda_vt_internal.h
+++ b/libavcodec/vda_vt_internal.h
@@ -40,6 +40,13 @@ typedef struct VTContext {
 
     // The core video buffer
     CVImageBufferRef            frame;
+
+    // Current dummy frames context (depends on exact CVImageBufferRef params).
+    struct AVBufferRef         *cached_hw_frames_ctx;
+
+    // Non-NULL if the new hwaccel API is used. This is only a separate struct
+    // to ease compatibility with the old API.
+    struct AVVideotoolboxContext *vt_ctx;
 } VTContext;
 
 int ff_videotoolbox_alloc_frame(AVCodecContext *avctx, AVFrame *frame);
diff --git a/libavcodec/version.h b/libavcodec/version.h
index f0b74fc3b4..a26e93974c 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,8 +28,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  57
-#define LIBAVCODEC_VERSION_MINOR  95
-#define LIBAVCODEC_VERSION_MICRO 101
+#define LIBAVCODEC_VERSION_MINOR  96
+#define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
index 67adad53ed..d36a33efcd 100644
--- a/libavcodec/videotoolbox.c
+++ b/libavcodec/videotoolbox.c
@@ -23,11 +23,13 @@
 #include "config.h"
 #if CONFIG_VIDEOTOOLBOX
 #  include "videotoolbox.h"
+#  include "libavutil/hwcontext_videotoolbox.h"
 #else
 #  include "vda.h"
 #endif
 #include "vda_vt_internal.h"
 #include "libavutil/avutil.h"
+#include "libavutil/hwcontext.h"
 #include "bytestream.h"
 #include "h264dec.h"
 #include "mpegvideo.h"
@@ -188,6 +190,73 @@ int ff_videotoolbox_uninit(AVCodecContext *avctx)
 }
 
 #if CONFIG_VIDEOTOOLBOX
+// Return the AVVideotoolboxContext that matters currently. Where it comes from
+// depends on the API used.
+static AVVideotoolboxContext *videotoolbox_get_context(AVCodecContext *avctx)
+{
+    // Somewhat tricky because the user can call av_videotoolbox_default_free()
+    // at any time, even when the codec is closed.
+    if (avctx->internal && avctx->internal->hwaccel_priv_data) {
+        VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+        if (vtctx->vt_ctx)
+            return vtctx->vt_ctx;
+    }
+    return avctx->hwaccel_context;
+}
+
+static int videotoolbox_buffer_create(AVCodecContext *avctx, AVFrame *frame)
+{
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    CVPixelBufferRef pixbuf = (CVPixelBufferRef)vtctx->frame;
+    OSType pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
+    enum AVPixelFormat sw_format = av_map_videotoolbox_format_to_pixfmt(pixel_format);
+    int width = CVPixelBufferGetWidth(pixbuf);
+    int height = CVPixelBufferGetHeight(pixbuf);
+    AVHWFramesContext *cached_frames;
+    int ret;
+
+    ret = ff_videotoolbox_buffer_create(vtctx, frame);
+    if (ret < 0)
+        return ret;
+
+    // Old API code path.
+    if (!vtctx->cached_hw_frames_ctx)
+        return 0;
+
+    cached_frames = (AVHWFramesContext*)vtctx->cached_hw_frames_ctx->data;
+
+    if (cached_frames->sw_format != sw_format ||
+        cached_frames->width != width ||
+        cached_frames->height != height) {
+        AVBufferRef *hw_frames_ctx = av_hwframe_ctx_alloc(cached_frames->device_ref);
+        AVHWFramesContext *hw_frames;
+        if (!hw_frames_ctx)
+            return AVERROR(ENOMEM);
+
+        hw_frames = (AVHWFramesContext*)hw_frames_ctx->data;
+        hw_frames->format = cached_frames->format;
+        hw_frames->sw_format = sw_format;
+        hw_frames->width = width;
+        hw_frames->height = height;
+
+        ret = av_hwframe_ctx_init(hw_frames_ctx);
+        if (ret < 0) {
+            av_buffer_unref(&hw_frames_ctx);
+            return ret;
+        }
+
+        av_buffer_unref(&vtctx->cached_hw_frames_ctx);
+        vtctx->cached_hw_frames_ctx = hw_frames_ctx;
+    }
+
+    av_assert0(!frame->hw_frames_ctx);
+    frame->hw_frames_ctx = av_buffer_ref(vtctx->cached_hw_frames_ctx);
+    if (!frame->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
 static void videotoolbox_write_mp4_descr_length(PutByteContext *pb, int length)
 {
     int i;
@@ -323,7 +392,7 @@ static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx)
 {
     OSStatus status;
     CMSampleBufferRef sample_buf;
-    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
+    AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
 
     sample_buf = videotoolbox_sample_buffer_create(videotoolbox->cm_fmt_desc,
@@ -349,7 +418,7 @@ static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx)
 static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     int status;
-    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
+    AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
 
     if (!videotoolbox->session || !vtctx->bitstream)
@@ -365,7 +434,7 @@ static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
     if (!vtctx->frame)
         return AVERROR_UNKNOWN;
 
-    return ff_videotoolbox_buffer_create(vtctx, frame);
+    return videotoolbox_buffer_create(avctx, frame);
 }
 
 static int videotoolbox_h264_end_frame(AVCodecContext *avctx)
@@ -513,7 +582,7 @@ static CMVideoFormatDescriptionRef videotoolbox_format_desc_create(CMVideoCodecT
 
 static int videotoolbox_default_init(AVCodecContext *avctx)
 {
-    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
+    AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
     OSStatus status;
     VTDecompressionOutputCallbackRecord decoder_cb;
     CFDictionaryRef decoder_spec;
@@ -594,7 +663,7 @@ static int videotoolbox_default_init(AVCodecContext *avctx)
 
 static void videotoolbox_default_free(AVCodecContext *avctx)
 {
-    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
+    AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
 
     if (videotoolbox) {
         if (videotoolbox->cm_fmt_desc)
@@ -607,6 +676,92 @@ static void videotoolbox_default_free(AVCodecContext *avctx)
     }
 }
 
+static int videotoolbox_uninit(AVCodecContext *avctx)
+{
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    if (!vtctx)
+        return 0;
+
+    ff_videotoolbox_uninit(avctx);
+
+    if (vtctx->vt_ctx)
+        videotoolbox_default_free(avctx);
+
+    av_buffer_unref(&vtctx->cached_hw_frames_ctx);
+    av_freep(&vtctx->vt_ctx);
+
+    return 0;
+}
+
+static int videotoolbox_common_init(AVCodecContext *avctx)
+{
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    AVHWFramesContext *hw_frames;
+    int err;
+
+    // Old API - do nothing.
+    if (avctx->hwaccel_context)
+        return 0;
+
+    if (!avctx->hw_frames_ctx && !avctx->hw_device_ctx) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Either hw_frames_ctx or hw_device_ctx must be set.\n");
+        return AVERROR(EINVAL);
+    }
+
+    vtctx->vt_ctx = av_videotoolbox_alloc_context();
+    if (!vtctx->vt_ctx) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if (avctx->hw_frames_ctx) {
+        hw_frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+    } else {
+        avctx->hw_frames_ctx = av_hwframe_ctx_alloc(avctx->hw_device_ctx);
+        if (!avctx->hw_frames_ctx) {
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        hw_frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+        hw_frames->format = AV_PIX_FMT_VIDEOTOOLBOX;
+        hw_frames->sw_format = AV_PIX_FMT_NV12; // same as av_videotoolbox_alloc_context()
+        hw_frames->width = avctx->width;
+        hw_frames->height = avctx->height;
+
+        err = av_hwframe_ctx_init(avctx->hw_frames_ctx);
+        if (err < 0) {
+            av_buffer_unref(&avctx->hw_frames_ctx);
+            goto fail;
+        }
+    }
+
+    vtctx->cached_hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx);
+    if (!vtctx->cached_hw_frames_ctx) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    vtctx->vt_ctx->cv_pix_fmt_type =
+        av_map_videotoolbox_format_from_pixfmt(hw_frames->sw_format);
+    if (!vtctx->vt_ctx->cv_pix_fmt_type) {
+        av_log(avctx, AV_LOG_ERROR, "Unknown sw_format.\n");
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    err = videotoolbox_default_init(avctx);
+    if (err < 0)
+        goto fail;
+
+    return 0;
+
+fail:
+    videotoolbox_uninit(avctx);
+    return err;
+}
+
 AVHWAccel ff_h263_videotoolbox_hwaccel = {
     .name           = "h263_videotoolbox",
     .type           = AVMEDIA_TYPE_VIDEO,
@@ -616,7 +771,8 @@ AVHWAccel ff_h263_videotoolbox_hwaccel = {
     .start_frame    = videotoolbox_mpeg_start_frame,
     .decode_slice   = videotoolbox_mpeg_decode_slice,
     .end_frame      = videotoolbox_mpeg_end_frame,
-    .uninit         = ff_videotoolbox_uninit,
+    .init           = videotoolbox_common_init,
+    .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
@@ -629,7 +785,8 @@ AVHWAccel ff_h264_videotoolbox_hwaccel = {
     .start_frame    = ff_videotoolbox_h264_start_frame,
     .decode_slice   = ff_videotoolbox_h264_decode_slice,
     .end_frame      = videotoolbox_h264_end_frame,
-    .uninit         = ff_videotoolbox_uninit,
+    .init           = videotoolbox_common_init,
+    .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
@@ -642,7 +799,8 @@ AVHWAccel ff_mpeg1_videotoolbox_hwaccel = {
     .start_frame    = videotoolbox_mpeg_start_frame,
     .decode_slice   = videotoolbox_mpeg_decode_slice,
     .end_frame      = videotoolbox_mpeg_end_frame,
-    .uninit         = ff_videotoolbox_uninit,
+    .init           = videotoolbox_common_init,
+    .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
@@ -655,7 +813,8 @@ AVHWAccel ff_mpeg2_videotoolbox_hwaccel = {
     .start_frame    = videotoolbox_mpeg_start_frame,
     .decode_slice   = videotoolbox_mpeg_decode_slice,
     .end_frame      = videotoolbox_mpeg_end_frame,
-    .uninit         = ff_videotoolbox_uninit,
+    .init           = videotoolbox_common_init,
+    .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
@@ -668,7 +827,8 @@ AVHWAccel ff_mpeg4_videotoolbox_hwaccel = {
     .start_frame    = videotoolbox_mpeg_start_frame,
     .decode_slice   = videotoolbox_mpeg_decode_slice,
     .end_frame      = videotoolbox_mpeg_end_frame,
-    .uninit         = ff_videotoolbox_uninit,
+    .init           = videotoolbox_common_init,
+    .uninit         = videotoolbox_uninit,
     .priv_data_size = sizeof(VTContext),
 };
 
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 1fd8dca54b..92ee5bd0b3 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -36,6 +36,7 @@ HEADERS = adler32.h                                                     \
           hwcontext_dxva2.h                                             \
           hwcontext_qsv.h                                               \
           hwcontext_vaapi.h                                             \
+          hwcontext_videotoolbox.h                                      \
           hwcontext_vdpau.h                                             \
           imgutils.h                                                    \
           intfloat.h                                                    \
@@ -160,6 +161,7 @@ OBJS-$(CONFIG_QSV)                   += hwcontext_qsv.o
 OBJS-$(CONFIG_LZO)                      += lzo.o
 OBJS-$(CONFIG_OPENCL)                   += opencl.o opencl_internal.o
 OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
+OBJS-$(CONFIG_VIDEOTOOLBOX)             += hwcontext_videotoolbox.o
 OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
 
 OBJS += $(COMPAT_OBJS:%=../compat/%)
@@ -172,6 +174,7 @@ SKIPHEADERS-$(CONFIG_CUDA)             += hwcontext_cuda_internal.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += hwcontext_dxva2.h
 SKIPHEADERS-$(CONFIG_QSV)           += hwcontext_qsv.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += hwcontext_vaapi.h
+SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += hwcontext_videotoolbox.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += hwcontext_vdpau.h
 SKIPHEADERS-$(HAVE_ATOMICS_GCC)        += atomic_gcc.h
 SKIPHEADERS-$(HAVE_ATOMICS_SUNCC)      += atomic_suncc.h
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 4cfe377982..8d50a32b84 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -44,6 +44,9 @@ static const HWContextType *hw_table[] = {
 #if CONFIG_VDPAU
     &ff_hwcontext_type_vdpau,
 #endif
+#if CONFIG_VIDEOTOOLBOX
+    &ff_hwcontext_type_videotoolbox,
+#endif
     NULL,
 };
 
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index 284b091209..cfc6ad0e28 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h
@@ -30,6 +30,7 @@ enum AVHWDeviceType {
     AV_HWDEVICE_TYPE_VAAPI,
     AV_HWDEVICE_TYPE_DXVA2,
     AV_HWDEVICE_TYPE_QSV,
+    AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
 };
 
 typedef struct AVHWDeviceInternal AVHWDeviceInternal;
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index 30fce2afd9..cf05323e15 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -144,5 +144,6 @@ extern const HWContextType ff_hwcontext_type_dxva2;
 extern const HWContextType ff_hwcontext_type_qsv;
 extern const HWContextType ff_hwcontext_type_vaapi;
 extern const HWContextType ff_hwcontext_type_vdpau;
+extern const HWContextType ff_hwcontext_type_videotoolbox;
 
 #endif /* AVUTIL_HWCONTEXT_INTERNAL_H */
diff --git a/libavutil/hwcontext_videotoolbox.c b/libavutil/hwcontext_videotoolbox.c
new file mode 100644
index 0000000000..cc00f1f2f2
--- /dev/null
+++ b/libavutil/hwcontext_videotoolbox.c
@@ -0,0 +1,243 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <VideoToolbox/VideoToolbox.h>
+
+#include "buffer.h"
+#include "common.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_videotoolbox.h"
+#include "mem.h"
+#include "pixfmt.h"
+#include "pixdesc.h"
+
+static const struct {
+    uint32_t cv_fmt;
+    enum AVPixelFormat pix_fmt;
+} cv_pix_fmts[] = {
+    { kCVPixelFormatType_420YpCbCr8Planar,              AV_PIX_FMT_YUV420P },
+    { kCVPixelFormatType_422YpCbCr8,                    AV_PIX_FMT_UYVY422 },
+    { kCVPixelFormatType_32BGRA,                        AV_PIX_FMT_BGRA },
+#ifdef kCFCoreFoundationVersionNumber10_7
+    { kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange,  AV_PIX_FMT_NV12 },
+#endif
+};
+
+enum AVPixelFormat av_map_videotoolbox_format_to_pixfmt(uint32_t cv_fmt)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(cv_pix_fmts); i++) {
+        if (cv_pix_fmts[i].cv_fmt == cv_fmt)
+            return cv_pix_fmts[i].pix_fmt;
+    }
+    return AV_PIX_FMT_NONE;
+}
+
+uint32_t av_map_videotoolbox_format_from_pixfmt(enum AVPixelFormat pix_fmt)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(cv_pix_fmts); i++) {
+        if (cv_pix_fmts[i].pix_fmt == pix_fmt)
+            return cv_pix_fmts[i].cv_fmt;
+    }
+    return 0;
+}
+
+static int vt_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
+{
+    frame->buf[0] = av_buffer_pool_get(ctx->pool);
+    if (!frame->buf[0])
+        return AVERROR(ENOMEM);
+
+    frame->data[3] = frame->buf[0]->data;
+    frame->format  = AV_PIX_FMT_VIDEOTOOLBOX;
+    frame->width   = ctx->width;
+    frame->height  = ctx->height;
+
+    return 0;
+}
+
+static int vt_transfer_get_formats(AVHWFramesContext *ctx,
+                                   enum AVHWFrameTransferDirection dir,
+                                   enum AVPixelFormat **formats)
+{
+    enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
+    if (!fmts)
+        return AVERROR(ENOMEM);
+
+    fmts[0] = ctx->sw_format;
+    fmts[1] = AV_PIX_FMT_NONE;
+
+    *formats = fmts;
+    return 0;
+}
+
+static void vt_unmap(AVHWFramesContext *ctx, HWMapDescriptor *hwmap)
+{
+    CVPixelBufferRef pixbuf = (CVPixelBufferRef)hwmap->source->data[3];
+
+    CVPixelBufferUnlockBaseAddress(pixbuf, (uintptr_t)hwmap->priv);
+}
+
+static int vt_map_frame(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src,
+                        int flags)
+{
+    CVPixelBufferRef pixbuf = (CVPixelBufferRef)src->data[3];
+    OSType pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
+    CVReturn err;
+    uint32_t map_flags = 0;
+    int ret;
+    int i;
+    enum AVPixelFormat format;
+
+    format = av_map_videotoolbox_format_to_pixfmt(pixel_format);
+    if (dst->format != format) {
+        av_log(ctx, AV_LOG_ERROR, "Unsupported or mismatching pixel format: %s\n",
+               av_fourcc2str(pixel_format));
+        return AVERROR_UNKNOWN;
+    }
+
+    if (CVPixelBufferGetWidth(pixbuf) != ctx->width ||
+        CVPixelBufferGetHeight(pixbuf) != ctx->height) {
+        av_log(ctx, AV_LOG_ERROR, "Inconsistent frame dimensions.\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    if (flags == AV_HWFRAME_MAP_READ)
+        map_flags = kCVPixelBufferLock_ReadOnly;
+
+    err = CVPixelBufferLockBaseAddress(pixbuf, map_flags);
+    if (err != kCVReturnSuccess) {
+        av_log(ctx, AV_LOG_ERROR, "Error locking the pixel buffer.\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    if (CVPixelBufferIsPlanar(pixbuf)) {
+        int planes = CVPixelBufferGetPlaneCount(pixbuf);
+        for (i = 0; i < planes; i++) {
+            dst->data[i]     = CVPixelBufferGetBaseAddressOfPlane(pixbuf, i);
+            dst->linesize[i] = CVPixelBufferGetBytesPerRowOfPlane(pixbuf, i);
+        }
+    } else {
+        dst->data[0]     = CVPixelBufferGetBaseAddress(pixbuf);
+        dst->linesize[0] = CVPixelBufferGetBytesPerRow(pixbuf);
+    }
+
+    ret = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, vt_unmap,
+                                (void *)(uintptr_t)map_flags);
+    if (ret < 0)
+        goto unlock;
+
+    return 0;
+
+unlock:
+    CVPixelBufferUnlockBaseAddress(pixbuf, map_flags);
+    return ret;
+}
+
+static int vt_transfer_data_from(AVHWFramesContext *hwfc,
+                                 AVFrame *dst, const AVFrame *src)
+{
+    AVFrame *map;
+    int err;
+
+    if (dst->width > hwfc->width || dst->height > hwfc->height)
+        return AVERROR(EINVAL);
+
+    map = av_frame_alloc();
+    if (!map)
+        return AVERROR(ENOMEM);
+    map->format = dst->format;
+
+    err = vt_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
+    if (err)
+        goto fail;
+
+    map->width  = dst->width;
+    map->height = dst->height;
+
+    err = av_frame_copy(dst, map);
+    if (err)
+        goto fail;
+
+    err = 0;
+fail:
+    av_frame_free(&map);
+    return err;
+}
+
+static int vt_transfer_data_to(AVHWFramesContext *hwfc,
+                               AVFrame *dst, const AVFrame *src)
+{
+    AVFrame *map;
+    int err;
+
+    if (src->width > hwfc->width || src->height > hwfc->height)
+        return AVERROR(EINVAL);
+
+    map = av_frame_alloc();
+    if (!map)
+        return AVERROR(ENOMEM);
+    map->format = src->format;
+
+    err = vt_map_frame(hwfc, map, dst, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE);
+    if (err)
+        goto fail;
+
+    map->width  = src->width;
+    map->height = src->height;
+
+    err = av_frame_copy(map, src);
+    if (err)
+        goto fail;
+
+    err = 0;
+fail:
+    av_frame_free(&map);
+    return err;
+}
+
+static int vt_device_create(AVHWDeviceContext *ctx, const char *device,
+                            AVDictionary *opts, int flags)
+{
+    if (device && device[0]) {
+        av_log(ctx, AV_LOG_ERROR, "Device selection unsupported.\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+const HWContextType ff_hwcontext_type_videotoolbox = {
+    .type                 = AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
+    .name                 = "videotoolbox",
+
+    .device_create        = vt_device_create,
+    .frames_get_buffer    = vt_get_buffer,
+    .transfer_get_formats = vt_transfer_get_formats,
+    .transfer_data_to     = vt_transfer_data_to,
+    .transfer_data_from   = vt_transfer_data_from,
+
+    .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_VIDEOTOOLBOX, AV_PIX_FMT_NONE },
+};
diff --git a/libavutil/hwcontext_videotoolbox.h b/libavutil/hwcontext_videotoolbox.h
new file mode 100644
index 0000000000..7b4de4b7e1
--- /dev/null
+++ b/libavutil/hwcontext_videotoolbox.h
@@ -0,0 +1,54 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HWCONTEXT_VT_H
+#define AVUTIL_HWCONTEXT_VT_H
+
+#include <stdint.h>
+
+#include <VideoToolbox/VideoToolbox.h>
+
+#include "pixfmt.h"
+
+/**
+ * @file
+ * An API-specific header for AV_HWDEVICE_TYPE_VIDEOTOOLBOX.
+ *
+ * This API currently does not support frame allocation, as the raw VideoToolbox
+ * API does allocation, and FFmpeg itself never has the need to allocate frames.
+ *
+ * If the API user sets a custom pool, AVHWFramesContext.pool must return
+ * AVBufferRefs whose data pointer is a CVImageBufferRef or CVPixelBufferRef.
+ *
+ * Currently AVHWDeviceContext.hwctx and AVHWFramesContext.hwctx are always
+ * NULL.
+ */
+
+/**
+ * Convert a VideoToolbox (actually CoreVideo) format to AVPixelFormat.
+ * Returns AV_PIX_FMT_NONE if no known equivalent was found.
+ */
+enum AVPixelFormat av_map_videotoolbox_format_to_pixfmt(uint32_t cv_fmt);
+
+/**
+ * Convert an AVPixelFormat to a VideoToolbox (actually CoreVideo) format.
+ * Returns 0 if no known equivalent was found.
+ */
+uint32_t av_map_videotoolbox_format_from_pixfmt(enum AVPixelFormat pix_fmt);
+
+#endif /* AVUTIL_HWCONTEXT_VT_H */
diff --git a/libavutil/version.h b/libavutil/version.h
index 6762bf300a..fb61dcc666 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -80,7 +80,7 @@
 
 
 #define LIBAVUTIL_VERSION_MAJOR  55
-#define LIBAVUTIL_VERSION_MINOR  62
+#define LIBAVUTIL_VERSION_MINOR  63
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \