[FFmpeg-cvslog] avcodec/videotoolbox: fix decoding of some HEVC videos

Fri May 18 22:55:20 EEST 2018

ffmpeg | branch: master | Aman Gupta <aman at tmm1.net> | Fri May  4 15:41:30 2018 -0700| [8f146b526ff8d63adc02e1c5db15850f4589230b] | committer: Aman Gupta

avcodec/videotoolbox: fix decoding of some HEVC videos

In a normal hwaccel, the AVHWFramesContext sets AVFrame.hw_frames_ctx
when it initializes a new AVFrame in av_hwframe_get_buffer().

But the VT hwaccel doesn't know what hw_frames_ctx to assign when
the AVFrame is first created, because it depends on the format of
the pixbuf that the decoder eventually decides to return. Thus
newly created AVFrames always have a NULL hw_frames_ctx, and the
hwaccel would only assign the ctx once a frame was done decoding.
This worked fine with the H264 decoder, but with the HEVC decoder
the frame's data may be moved to another empty AVFrame. Since the
empty AVFrame never had hw_frames_ctx set, a frame with a NULL
ctx could be returned to the API user.

This patch works around the issue by moving the derived
hw_frames_ctx from the AVFrame to a new VTHWFrame which now holds
both the CVPixelBufferRef and the AVBuffer. The hw_frames_ctx
is only copied to the AVFrame right before it is about to be
returned to the user in videotoolbox_postproc_frame() (since
in the case of VT, the hw_frames_ctx is only there for the API
user anyway).

Fixes playback on macOS and iOS of some hevc videos like
https://s3.amazonaws.com/tmm1/videotoolbox/germany-hevc-zdf.ts

Signed-off-by: Aman Gupta <aman at tmm1.net>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8f146b526ff8d63adc02e1c5db15850f4589230b
---

 libavcodec/videotoolbox.c | 67 +++++++++++++++++++++++++----------------------
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
index fe5c9004b4..ac45e23c16 100644
--- a/libavcodec/videotoolbox.c
+++ b/libavcodec/videotoolbox.c
@@ -46,10 +46,16 @@ enum { kCMVideoCodecType_HEVC = 'hvc1' };
 
 #define VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING  12
 
+typedef struct VTHWFrame {
+    CVPixelBufferRef pixbuf;
+    AVBufferRef *hw_frames_ctx;
+} VTHWFrame;
+
 static void videotoolbox_buffer_release(void *opaque, uint8_t *data)
 {
-    CVPixelBufferRef cv_buffer = *(CVPixelBufferRef *)data;
-    CVPixelBufferRelease(cv_buffer);
+    VTHWFrame *ref = (VTHWFrame *)data;
+    av_buffer_unref(&ref->hw_frames_ctx);
+    CVPixelBufferRelease(ref->pixbuf);
 
     av_free(data);
 }
@@ -76,22 +82,29 @@ static int videotoolbox_buffer_copy(VTContext *vtctx,
 
 static int videotoolbox_postproc_frame(void *avctx, AVFrame *frame)
 {
-    CVPixelBufferRef ref = *(CVPixelBufferRef *)frame->buf[0]->data;
+    VTHWFrame *ref = (VTHWFrame *)frame->buf[0]->data;
 
-    if (!ref) {
+    if (!ref->pixbuf) {
         av_log(avctx, AV_LOG_ERROR, "No frame decoded?\n");
         av_frame_unref(frame);
         return AVERROR_EXTERNAL;
     }
 
-    frame->data[3] = (uint8_t*)ref;
+    frame->data[3] = (uint8_t*)ref->pixbuf;
+
+    if (ref->hw_frames_ctx) {
+        av_buffer_unref(&frame->hw_frames_ctx);
+        frame->hw_frames_ctx = av_buffer_ref(ref->hw_frames_ctx);
+        if (!frame->hw_frames_ctx)
+            return AVERROR(ENOMEM);
+    }
 
     return 0;
 }
 
 int ff_videotoolbox_alloc_frame(AVCodecContext *avctx, AVFrame *frame)
 {
-    size_t      size = sizeof(CVPixelBufferRef);
+    size_t      size = sizeof(VTHWFrame);
     uint8_t    *data = NULL;
     AVBufferRef *buf = NULL;
     int ret = ff_attach_decode_data(frame);
@@ -318,26 +331,6 @@ CFDataRef ff_videotoolbox_hvcc_extradata_create(AVCodecContext *avctx)
     return data;
 }
 
-static int videotoolbox_set_frame(AVCodecContext *avctx, AVFrame *frame)
-{
-    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
-    if (!frame->buf[0] || frame->data[3]) {
-        av_log(avctx, AV_LOG_ERROR, "videotoolbox: invalid state\n");
-        av_frame_unref(frame);
-        return AVERROR_EXTERNAL;
-    }
-
-    CVPixelBufferRef *ref = (CVPixelBufferRef *)frame->buf[0]->data;
-
-    if (*ref)
-        CVPixelBufferRelease(*ref);
-
-    *ref = vtctx->frame;
-    vtctx->frame = NULL;
-
-    return 0;
-}
-
 int ff_videotoolbox_h264_start_frame(AVCodecContext *avctx,
                                      const uint8_t *buffer,
                                      uint32_t size)
@@ -446,11 +439,21 @@ static int videotoolbox_buffer_create(AVCodecContext *avctx, AVFrame *frame)
     int width = CVPixelBufferGetWidth(pixbuf);
     int height = CVPixelBufferGetHeight(pixbuf);
     AVHWFramesContext *cached_frames;
+    VTHWFrame *ref;
     int ret;
 
-    ret = videotoolbox_set_frame(avctx, frame);
-    if (ret < 0)
-        return ret;
+    if (!frame->buf[0] || frame->data[3]) {
+        av_log(avctx, AV_LOG_ERROR, "videotoolbox: invalid state\n");
+        av_frame_unref(frame);
+        return AVERROR_EXTERNAL;
+    }
+
+    ref = (VTHWFrame *)frame->buf[0]->data;
+
+    if (ref->pixbuf)
+        CVPixelBufferRelease(ref->pixbuf);
+    ref->pixbuf = vtctx->frame;
+    vtctx->frame = NULL;
 
     // Old API code path.
     if (!vtctx->cached_hw_frames_ctx)
@@ -482,9 +485,9 @@ static int videotoolbox_buffer_create(AVCodecContext *avctx, AVFrame *frame)
         vtctx->cached_hw_frames_ctx = hw_frames_ctx;
     }
 
-    av_buffer_unref(&frame->hw_frames_ctx);
-    frame->hw_frames_ctx = av_buffer_ref(vtctx->cached_hw_frames_ctx);
-    if (!frame->hw_frames_ctx)
+    av_buffer_unref(&ref->hw_frames_ctx);
+    ref->hw_frames_ctx = av_buffer_ref(vtctx->cached_hw_frames_ctx);
+    if (!ref->hw_frames_ctx)
         return AVERROR(ENOMEM);
 
     return 0;