[FFmpeg-devel] [PATCH 1/3] avcodec: v4l2_m2m: fix races around freeing data on close

Tue Jan 9 21:36:10 EET 2018

On Tue,  9 Jan 2018 18:06:11 +0100
Jorge Ramirez-Ortiz <jorge.ramirez.ortiz at gmail.com> wrote:

> From: Mark Thompson <sw at jkqxz.net>
> 
> Refcount all of the context information. This also fixes a potential
> segmentation fault when accessing freed memory  (buffer returned after
> the codec has been closed).
> 
> Tested-by: Jorge Ramirez-Ortiz <jorge.ramirez.ortiz at gmail.com>
> ---
>  libavcodec/v4l2_buffers.c | 32 ++++++++++------
>  libavcodec/v4l2_buffers.h |  6 +++
>  libavcodec/v4l2_m2m.c     | 93 +++++++++++++++++++++++++++++------------------
>  libavcodec/v4l2_m2m.h     | 35 ++++++++++++++----
>  libavcodec/v4l2_m2m_dec.c | 22 +++++++----
>  libavcodec/v4l2_m2m_enc.c | 22 +++++++----
>  6 files changed, 140 insertions(+), 70 deletions(-)
> 
> diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
> index ba70c5d..4e68f90 100644
> --- a/libavcodec/v4l2_buffers.c
> +++ b/libavcodec/v4l2_buffers.c
> @@ -207,20 +207,17 @@ static void v4l2_free_buffer(void *opaque, uint8_t *unused)
>      V4L2Buffer* avbuf = opaque;
>      V4L2m2mContext *s = buf_to_m2mctx(avbuf);
>  
> -    atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
> -    if (s->reinit) {
> -        if (!atomic_load(&s->refcount))
> -            sem_post(&s->refsync);
> -        return;
> -    }
> +    if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
> +        atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
>  
> -    if (avbuf->context->streamon) {
> -        ff_v4l2_buffer_enqueue(avbuf);
> -        return;
> -    }
> +        if (s->reinit) {
> +            if (!atomic_load(&s->refcount))
> +                sem_post(&s->refsync);
> +        } else if (avbuf->context->streamon)
> +            ff_v4l2_buffer_enqueue(avbuf);
>  
> -    if (!atomic_load(&s->refcount))
> -        ff_v4l2_m2m_codec_end(s->avctx);
> +        av_buffer_unref(&avbuf->context_ref);
> +    }
>  }
>  
>  static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
> @@ -236,6 +233,17 @@ static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
>      if (!*buf)
>          return AVERROR(ENOMEM);
>  
> +    if (in->context_ref)
> +        atomic_fetch_add(&in->context_refcount, 1);
> +    else {
> +        in->context_ref = av_buffer_ref(s->self_ref);
> +        if (!in->context_ref) {
> +            av_buffer_unref(buf);
> +            return AVERROR(ENOMEM);
> +        }
> +        in->context_refcount = 1;
> +    }
> +
>      in->status = V4L2BUF_RET_USER;
>      atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
>  
> diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
> index e28a4a6..dc5cc9e 100644
> --- a/libavcodec/v4l2_buffers.h
> +++ b/libavcodec/v4l2_buffers.h
> @@ -24,6 +24,7 @@
>  #ifndef AVCODEC_V4L2_BUFFERS_H
>  #define AVCODEC_V4L2_BUFFERS_H
>  
> +#include <stdatomic.h>
>  #include <linux/videodev2.h>
>  
>  #include "avcodec.h"
> @@ -41,6 +42,11 @@ typedef struct V4L2Buffer {
>      /* each buffer needs to have a reference to its context */
>      struct V4L2Context *context;
>  
> +    /* This object is refcounted per-plane, so we need to keep track
> +     * of how many context-refs we are holding. */
> +    AVBufferRef *context_ref;
> +    atomic_uint context_refcount;
> +
>      /* keep track of the mmap address and mmap length */
>      struct V4L2Plane_info {
>          int bytesperline;
> diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
> index 1d7a852..fd989ce 100644
> --- a/libavcodec/v4l2_m2m.c
> +++ b/libavcodec/v4l2_m2m.c
> @@ -222,8 +222,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext* s)
>      }
>  
>      /* 5. complete reinit */
> -    sem_destroy(&s->refsync);
> -    sem_init(&s->refsync, 0, 0);
>      s->draining = 0;
>      s->reinit = 0;
>  
> @@ -241,24 +239,26 @@ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *s)
>      if (atomic_load(&s->refcount))
>          while(sem_wait(&s->refsync) == -1 && errno == EINTR);
>  
> -    /* close the driver */
> -    ff_v4l2_m2m_codec_end(s->avctx);
> +    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
> +    if (ret) {
> +        av_log(s->avctx, AV_LOG_ERROR, "output VIDIOC_STREAMOFF\n");
> +        goto error;
> +    }
> +
> +    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
> +    if (ret) {
> +            av_log(s->avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
> +            goto error;
> +    }
> +
> +    /* release and unmmap the buffers */
> +    ff_v4l2_context_release(&s->output);
> +    ff_v4l2_context_release(&s->capture);
>  
>      /* start again now that we know the stream dimensions */
>      s->draining = 0;
>      s->reinit = 0;
>  
> -    s->fd = open(s->devname, O_RDWR | O_NONBLOCK, 0);
> -    if (s->fd < 0)
> -        return AVERROR(errno);
> -
> -    ret = v4l2_prepare_contexts(s);
> -    if (ret < 0)
> -        goto error;
> -
> -    /* if a full re-init was requested - probe didn't run - we need to populate
> -     * the format for each context
> -     */
>      ret = ff_v4l2_context_get_format(&s->output);
>      if (ret) {
>          av_log(log_ctx, AV_LOG_DEBUG, "v4l2 output format not supported\n");
> @@ -301,19 +301,25 @@ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *s)
>      return 0;
>  
>  error:
> -    if (close(s->fd) < 0) {
> -        ret = AVERROR(errno);
> -        av_log(log_ctx, AV_LOG_ERROR, "error closing %s (%s)\n",
> -            s->devname, av_err2str(AVERROR(errno)));
> -    }
> -    s->fd = -1;
> -
>      return ret;
>  }
>  
> +static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
> +{
> +    V4L2m2mContext *s = (V4L2m2mContext*)context;
> +
> +    ff_v4l2_context_release(&s->capture);
> +    sem_destroy(&s->refsync);
> +
> +    close(s->fd);
> +
> +    av_free(s);
> +}
> +
>  int ff_v4l2_m2m_codec_end(AVCodecContext *avctx)
>  {
> -    V4L2m2mContext* s = avctx->priv_data;
> +    V4L2m2mPriv *priv = avctx->priv_data;
> +    V4L2m2mContext* s = priv->context;
>      int ret;
>  
>      ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
> @@ -326,17 +332,8 @@ int ff_v4l2_m2m_codec_end(AVCodecContext *avctx)
>  
>      ff_v4l2_context_release(&s->output);
>  
> -    if (atomic_load(&s->refcount))
> -        av_log(avctx, AV_LOG_ERROR, "ff_v4l2m2m_codec_end leaving pending buffers\n");
> -
> -    ff_v4l2_context_release(&s->capture);
> -    sem_destroy(&s->refsync);
> -
> -    /* release the hardware */
> -    if (close(s->fd) < 0 )
> -        av_log(avctx, AV_LOG_ERROR, "failure closing %s (%s)\n", s->devname, av_err2str(AVERROR(errno)));
> -
> -    s->fd = -1;
> +    s->self_ref = NULL;
> +    av_buffer_unref(&priv->context_ref);
>  
>      return 0;
>  }
> @@ -348,7 +345,7 @@ int ff_v4l2_m2m_codec_init(AVCodecContext *avctx)
>      char node[PATH_MAX];
>      DIR *dirp;
>  
> -    V4L2m2mContext *s = avctx->priv_data;
> +    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
>      s->avctx = avctx;
>  
>      dirp = opendir("/dev");
> @@ -381,3 +378,29 @@ int ff_v4l2_m2m_codec_init(AVCodecContext *avctx)
>  
>      return v4l2_configure_contexts(s);
>  }
> +
> +int ff_v4l2_m2m_create_context(AVCodecContext *avctx, V4L2m2mContext **s)
> +{
> +    V4L2m2mPriv *priv = avctx->priv_data;
> +
> +    *s = av_mallocz(sizeof(V4L2m2mContext));
> +    if (!*s)
> +        return AVERROR(ENOMEM);
> +
> +    priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext),
> +                                         &v4l2_m2m_destroy_context, NULL, 0);
> +    if (!priv->context_ref) {
> +        av_free(s);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    /* assign the context */
> +    priv->context = *s;
> +
> +    /* populate it */
> +    priv->context->capture.num_buffers = priv->num_capture_buffers;
> +    priv->context->output.num_buffers  = priv->num_output_buffers;
> +    priv->context->self_ref = priv->context_ref;
> +
> +    return 0;
> +}
> diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
> index afa3987..452bf0d 100644
> --- a/libavcodec/v4l2_m2m.h
> +++ b/libavcodec/v4l2_m2m.h
> @@ -38,11 +38,9 @@
>  
>  #define V4L_M2M_DEFAULT_OPTS \
>      { "num_output_buffers", "Number of buffers in the output context",\
> -        OFFSET(output.num_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 6, INT_MAX, FLAGS }
> +        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 6, INT_MAX, FLAGS }
>  
> -typedef struct V4L2m2mContext
> -{
> -    AVClass *class;
> +typedef struct V4L2m2mContext {
>      char devname[PATH_MAX];
>      int fd;
>  
> @@ -50,18 +48,41 @@ typedef struct V4L2m2mContext
>      V4L2Context capture;
>      V4L2Context output;
>  
> -    /* refcount of buffers held by the user */
> -    atomic_uint refcount;
> -
>      /* dynamic stream reconfig */
>      AVCodecContext *avctx;
>      sem_t refsync;
> +    atomic_uint refcount;
>      int reinit;
>  
>      /* null frame/packet received */
>      int draining;
> +
> +    /* Reference to self; only valid while codec is active. */
> +    AVBufferRef *self_ref;
>  } V4L2m2mContext;
>  
> +typedef struct V4L2m2mPriv
> +{
> +    AVClass *class;
> +
> +    V4L2m2mContext *context;
> +    AVBufferRef    *context_ref;
> +
> +    int num_output_buffers;
> +    int num_capture_buffers;
> +} V4L2m2mPriv;
> +
> +/**
> + * Allocate a new context and references for a V4L2 M2M instance.
> + *
> + * @param[in] ctx The AVCodecContext instantiated by the encoder/decoder.
> + * @param[out] ctx The V4L2m2mContext.
> + *
> + * @returns 0 in success, a negative error code otherwise.
> + */
> +int ff_v4l2_m2m_create_context(AVCodecContext *avctx, V4L2m2mContext **s);
> +
> +
>  /**
>   * Probes the video nodes looking for the required codec capabilities.
>   *
> diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
> index 8308613..bca45be 100644
> --- a/libavcodec/v4l2_m2m_dec.c
> +++ b/libavcodec/v4l2_m2m_dec.c
> @@ -35,7 +35,7 @@
>  
>  static int v4l2_try_start(AVCodecContext *avctx)
>  {
> -    V4L2m2mContext *s = avctx->priv_data;
> +    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
>      V4L2Context *const capture = &s->capture;
>      V4L2Context *const output = &s->output;
>      struct v4l2_selection selection;
> @@ -127,7 +127,7 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s)
>  
>  static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
>  {
> -    V4L2m2mContext *s = avctx->priv_data;
> +    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
>      V4L2Context *const capture = &s->capture;
>      V4L2Context *const output = &s->output;
>      AVPacket avpkt = {0};
> @@ -159,11 +159,17 @@ dequeue:
>  
>  static av_cold int v4l2_decode_init(AVCodecContext *avctx)
>  {
> -    V4L2m2mContext *s = avctx->priv_data;
> -    V4L2Context *capture = &s->capture;
> -    V4L2Context *output = &s->output;
> +    V4L2Context *capture, *output;
> +    V4L2m2mContext *s;
>      int ret;
>  
> +    ret = ff_v4l2_m2m_create_context(avctx, &s);
> +    if (ret < 0)
> +        return ret;
> +
> +    capture = &s->capture;
> +    output = &s->output;
> +
>      /* if these dimensions are invalid (ie, 0 or too small) an event will be raised
>       * by the v4l2 driver; this event will trigger a full pipeline reconfig and
>       * the proper values will be retrieved from the kernel driver.
> @@ -186,13 +192,13 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
>      return v4l2_prepare_decoder(s);
>  }
>  
> -#define OFFSET(x) offsetof(V4L2m2mContext, x)
> +#define OFFSET(x) offsetof(V4L2m2mPriv, x)
>  #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
>  
>  static const AVOption options[] = {
>      V4L_M2M_DEFAULT_OPTS,
>      { "num_capture_buffers", "Number of buffers in the capture context",
> -        OFFSET(capture.num_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 20, INT_MAX, FLAGS },
> +        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 20, INT_MAX, FLAGS },
>      { NULL},
>  };
>  
> @@ -209,7 +215,7 @@ AVCodec ff_ ## NAME ## _v4l2m2m_decoder = { \
>      .long_name      = NULL_IF_CONFIG_SMALL("V4L2 mem2mem " LONGNAME " decoder wrapper"),\
>      .type           = AVMEDIA_TYPE_VIDEO,\
>      .id             = CODEC ,\
> -    .priv_data_size = sizeof(V4L2m2mContext),\
> +    .priv_data_size = sizeof(V4L2m2mPriv),\
>      .priv_class     = &v4l2_m2m_ ## NAME ## _dec_class,\
>      .init           = v4l2_decode_init,\
>      .receive_frame  = v4l2_receive_frame,\
> diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
> index 7e88f4d..4c9ea1f 100644
> --- a/libavcodec/v4l2_m2m_enc.c
> +++ b/libavcodec/v4l2_m2m_enc.c
> @@ -242,7 +242,7 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s)
>  
>  static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
>  {
> -    V4L2m2mContext *s = avctx->priv_data;
> +    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
>      V4L2Context *const output = &s->output;
>  
>      return ff_v4l2_context_enqueue_frame(output, frame);
> @@ -250,7 +250,7 @@ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
>  
>  static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
>  {
> -    V4L2m2mContext *s = avctx->priv_data;
> +    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
>      V4L2Context *const capture = &s->capture;
>      V4L2Context *const output = &s->output;
>      int ret;
> @@ -280,11 +280,17 @@ dequeue:
>  
>  static av_cold int v4l2_encode_init(AVCodecContext *avctx)
>  {
> -    V4L2m2mContext *s = avctx->priv_data;
> -    V4L2Context *capture = &s->capture;
> -    V4L2Context *output = &s->output;
> +    V4L2Context *capture, *output;
> +    V4L2m2mContext *s;
>      int ret;
>  
> +    ret = ff_v4l2_m2m_create_context(avctx, &s);
> +    if (ret < 0)
> +        return ret;
> +
> +    capture = &s->capture;
> +    output  = &s->output;
> +
>      /* common settings output/capture */
>      output->height = capture->height = avctx->height;
>      output->width = capture->width = avctx->width;
> @@ -306,13 +312,13 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
>      return v4l2_prepare_encoder(s);
>  }
>  
> -#define OFFSET(x) offsetof(V4L2m2mContext, x)
> +#define OFFSET(x) offsetof(V4L2m2mPriv, x)
>  #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
>  
>  static const AVOption options[] = {
>      V4L_M2M_DEFAULT_OPTS,
>      { "num_capture_buffers", "Number of buffers in the capture context",
> -        OFFSET(capture.num_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS },
> +        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS },
>      { NULL },
>  };
>  
> @@ -329,7 +335,7 @@ AVCodec ff_ ## NAME ## _v4l2m2m_encoder = { \
>      .long_name      = NULL_IF_CONFIG_SMALL("V4L2 mem2mem " LONGNAME " encoder wrapper"),\
>      .type           = AVMEDIA_TYPE_VIDEO,\
>      .id             = CODEC ,\
> -    .priv_data_size = sizeof(V4L2m2mContext),\
> +    .priv_data_size = sizeof(V4L2m2mPriv),\
>      .priv_class     = &v4l2_m2m_ ## NAME ##_enc_class,\
>      .init           = v4l2_encode_init,\
>      .send_frame     = v4l2_send_frame,\

Yes, that seems much better conceptually. Though I wonder why there's
still an atomic refcount left.

Also, can you send mail only to the mailing list? You CC a bunch of
people and have yourself as part of the "To:" header, so I have to
change the list of people I reply to every time since my dumb mail
client can't figure it out itself. Not sure if others are
inconvenienced by this as well - if not, feel free to ignore this
request.