[FFmpeg-devel] [PATCH] avcodec: export motion vectors in frame side data on demand

Clément Bœsch u at pkh.me
Mon Aug 11 15:22:59 CEST 2014


From: Clément Bœsch <clement at stupeflix.com>

The reasoning behind this addition is that various third party
applications are interested in getting some motion information out of a
video "for free" when it is available.

It was considered to export other information as well (such as the intra
information about the block, or the quantization) but the structure
might have ended up into a half full-generic, half full of codec
specific cruft. If more information is necessary, it should either be
added in the "flags" field of the AVExportedMV structure, or in another
side-data.

This commit also includes an example exporting them in a CSV stream.
---
TODO: avcodec version bump & APIChanges entry
---
 .gitignore                 |   1 +
 configure                  |   2 +
 doc/Makefile               |   1 +
 doc/codecs.texi            |   3 +
 doc/examples/Makefile      |   1 +
 doc/examples/extract_mvs.c | 185 +++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/avcodec.h       |   1 +
 libavcodec/mpegvideo.c     | 102 ++++++++++++++++++++++++-
 libavcodec/options_table.h |   1 +
 libavutil/frame.h          |   6 ++
 libavutil/mvinfo.h         |  49 ++++++++++++
 11 files changed, 351 insertions(+), 1 deletion(-)
 create mode 100644 doc/examples/extract_mvs.c
 create mode 100644 libavutil/mvinfo.h

diff --git a/.gitignore b/.gitignore
index cb370bb..480fbe0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,6 +39,7 @@
 /doc/examples/avio_reading
 /doc/examples/decoding_encoding
 /doc/examples/demuxing_decoding
+/doc/examples/extract_mvs
 /doc/examples/filter_audio
 /doc/examples/filtering_audio
 /doc/examples/filtering_video
diff --git a/configure b/configure
index 0ac6132..a93fc06 100755
--- a/configure
+++ b/configure
@@ -1306,6 +1306,7 @@ EXAMPLE_LIST="
     avio_reading_example
     decoding_encoding_example
     demuxing_decoding_example
+    extract_mvs_example
     filter_audio_example
     filtering_audio_example
     filtering_video_example
@@ -2586,6 +2587,7 @@ zoompan_filter_deps="swscale"
 avio_reading="avformat avcodec avutil"
 avcodec_example_deps="avcodec avutil"
 demuxing_decoding_example_deps="avcodec avformat avutil"
+extract_mvs_example_deps="avcodec avformat avutil"
 filter_audio_example_deps="avfilter avutil"
 filtering_audio_example_deps="avfilter avcodec avformat avutil"
 filtering_video_example_deps="avfilter avcodec avformat avutil"
diff --git a/doc/Makefile b/doc/Makefile
index 99f588a..2fb9058 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -39,6 +39,7 @@ DOCS = $(DOCS-yes)
 DOC_EXAMPLES-$(CONFIG_AVIO_READING_EXAMPLE)      += avio_reading
 DOC_EXAMPLES-$(CONFIG_AVCODEC_EXAMPLE)           += avcodec
 DOC_EXAMPLES-$(CONFIG_DEMUXING_DECODING_EXAMPLE) += demuxing_decoding
+DOC_EXAMPLES-$(CONFIG_EXTRACT_MVS_EXAMPLE)       += extract_mvs
 DOC_EXAMPLES-$(CONFIG_FILTER_AUDIO_EXAMPLE)      += filter_audio
 DOC_EXAMPLES-$(CONFIG_FILTERING_AUDIO_EXAMPLE)   += filtering_audio
 DOC_EXAMPLES-$(CONFIG_FILTERING_VIDEO_EXAMPLE)   += filtering_video
diff --git a/doc/codecs.texi b/doc/codecs.texi
index 1160e5d..7aaa229 100644
--- a/doc/codecs.texi
+++ b/doc/codecs.texi
@@ -797,6 +797,9 @@ Frame data might be split into multiple chunks.
 Show all frames before the first keyframe.
 @item skiprd
 Deprecated, use mpegvideo private options instead.
+ at item export_mvs
+Export motion vectors into frame side-data (see @code{AV_FRAME_DATA_MV_INFO})
+for codecs that support it. See also @file{doc/examples/export_mvs.c}.
 @end table
 
 @item error @var{integer} (@emph{encoding,video})
diff --git a/doc/examples/Makefile b/doc/examples/Makefile
index 03c7021..07251fe 100644
--- a/doc/examples/Makefile
+++ b/doc/examples/Makefile
@@ -14,6 +14,7 @@ LDLIBS := $(shell pkg-config --libs $(FFMPEG_LIBS)) $(LDLIBS)
 EXAMPLES=       avio_reading                       \
                 decoding_encoding                  \
                 demuxing_decoding                  \
+                extract_mvs                        \
                 filtering_video                    \
                 filtering_audio                    \
                 metadata                           \
diff --git a/doc/examples/extract_mvs.c b/doc/examples/extract_mvs.c
new file mode 100644
index 0000000..69f76cd
--- /dev/null
+++ b/doc/examples/extract_mvs.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2012 Stefano Sabatini
+ * Copyright (c) 2014 Clément Bœsch
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <libavutil/mvinfo.h>
+#include <libavformat/avformat.h>
+
+static AVFormatContext *fmt_ctx = NULL;
+static AVCodecContext *video_dec_ctx = NULL;
+static AVStream *video_stream = NULL;
+static const char *src_filename = NULL;
+
+static int video_stream_idx = -1;
+static AVFrame *frame = NULL;
+static AVPacket pkt;
+static int video_frame_count = 0;
+
+static int decode_packet(int *got_frame, int cached)
+{
+    int decoded = pkt.size;
+
+    *got_frame = 0;
+
+    if (pkt.stream_index == video_stream_idx) {
+        int ret = avcodec_decode_video2(video_dec_ctx, frame, got_frame, &pkt);
+        if (ret < 0) {
+            fprintf(stderr, "Error decoding video frame (%s)\n", av_err2str(ret));
+            return ret;
+        }
+
+        if (*got_frame) {
+            int i;
+            AVFrameSideData *sd;
+
+            video_frame_count++;
+            sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MV_INFO);
+            if (sd) {
+                const AVExportedMV *mvs = (const AVExportedMV *)sd->data;
+                for (i = 0; i < sd->size / sizeof(*mvs); i++) {
+                    const AVExportedMV *mv = &mvs[i];
+                    printf("%d,%2d,%2d,%2d,%4d,%4d,%4d,%4d,0x%016x\n",
+                           video_frame_count, mv->source,
+                           mv->w, mv->h, mv->src_x, mv->src_y,
+                           mv->dst_x, mv->dst_y, mv->flags);
+                }
+            }
+        }
+    }
+
+    return decoded;
+}
+
+static int open_codec_context(int *stream_idx,
+                              AVFormatContext *fmt_ctx, enum AVMediaType type)
+{
+    int ret;
+    AVStream *st;
+    AVCodecContext *dec_ctx = NULL;
+    AVCodec *dec = NULL;
+    AVDictionary *opts = NULL;
+
+    ret = av_find_best_stream(fmt_ctx, type, -1, -1, NULL, 0);
+    if (ret < 0) {
+        fprintf(stderr, "Could not find %s stream in input file '%s'\n",
+                av_get_media_type_string(type), src_filename);
+        return ret;
+    } else {
+        *stream_idx = ret;
+        st = fmt_ctx->streams[*stream_idx];
+
+        /* find decoder for the stream */
+        dec_ctx = st->codec;
+        dec = avcodec_find_decoder(dec_ctx->codec_id);
+        if (!dec) {
+            fprintf(stderr, "Failed to find %s codec\n",
+                    av_get_media_type_string(type));
+            return AVERROR(EINVAL);
+        }
+
+        /* Init the video decoder */
+        av_dict_set(&opts, "flags2", "+export_mvs", 0);
+        if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) {
+            fprintf(stderr, "Failed to open %s codec\n",
+                    av_get_media_type_string(type));
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    int ret = 0, got_frame;
+
+    if (argc != 2) {
+        fprintf(stderr, "Usage: %s <video>\n", argv[0]);
+        exit(1);
+    }
+    src_filename = argv[1];
+
+    av_register_all();
+
+    if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) {
+        fprintf(stderr, "Could not open source file %s\n", src_filename);
+        exit(1);
+    }
+
+    if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
+        fprintf(stderr, "Could not find stream information\n");
+        exit(1);
+    }
+
+    if (open_codec_context(&video_stream_idx, fmt_ctx, AVMEDIA_TYPE_VIDEO) >= 0) {
+        video_stream = fmt_ctx->streams[video_stream_idx];
+        video_dec_ctx = video_stream->codec;
+    }
+
+    av_dump_format(fmt_ctx, 0, src_filename, 0);
+
+    if (!video_stream) {
+        fprintf(stderr, "Could not find video stream in the input, aborting\n");
+        ret = 1;
+        goto end;
+    }
+
+    frame = av_frame_alloc();
+    if (!frame) {
+        fprintf(stderr, "Could not allocate frame\n");
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags\n");
+
+    /* initialize packet, set data to NULL, let the demuxer fill it */
+    av_init_packet(&pkt);
+    pkt.data = NULL;
+    pkt.size = 0;
+
+    /* read frames from the file */
+    while (av_read_frame(fmt_ctx, &pkt) >= 0) {
+        AVPacket orig_pkt = pkt;
+        do {
+            ret = decode_packet(&got_frame, 0);
+            if (ret < 0)
+                break;
+            pkt.data += ret;
+            pkt.size -= ret;
+        } while (pkt.size > 0);
+        av_free_packet(&orig_pkt);
+    }
+
+    /* flush cached frames */
+    pkt.data = NULL;
+    pkt.size = 0;
+    do {
+        decode_packet(&got_frame, 1);
+    } while (got_frame);
+
+end:
+    avcodec_close(video_dec_ctx);
+    avformat_close_input(&fmt_ctx);
+    av_frame_free(&frame);
+    return ret < 0;
+}
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 3b6a750..efe3dbd 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -767,6 +767,7 @@ typedef struct RcOverride{
 
 #define CODEC_FLAG2_CHUNKS        0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
 #define CODEC_FLAG2_SHOW_ALL      0x00400000 ///< Show all frames before the first keyframe
+#define CODEC_FLAG2_EXPORT_MVS    0x10000000 ///< Export motion vectors through frame side data
 
 /* Unsupported options :
  *              Syntax Arithmetic coding (SAC)
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 4672359..fe11089 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -31,6 +31,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
+#include "libavutil/mvinfo.h"
 #include "libavutil/timer.h"
 #include "avcodec.h"
 #include "blockdsp.h"
@@ -596,7 +597,8 @@ static int alloc_picture_tables(MpegEncContext *s, Picture *pic)
             return AVERROR(ENOMEM);
     }
 
-    if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv) {
+    if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv ||
+        (s->avctx->flags2 & CODEC_FLAG2_EXPORT_MVS)) {
         int mv_size        = 2 * (b8_array_size + 4) * sizeof(int16_t);
         int ref_index_size = 4 * mb_array_size;
 
@@ -2104,6 +2106,24 @@ static void draw_arrow(uint8_t *buf, int sx, int sy, int ex,
     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
 }
 
+static int add_mb(AVExportedMV *mb, uint32_t mb_type,
+                  int dst_x, int dst_y,
+                  int src_x, int src_y,
+                  int direction)
+{
+    if (dst_x == src_x && dst_y == src_y)
+        return 0;
+    mb->w = IS_8X8(mb_type) || IS_8X16(mb_type) ? 8 : 16;
+    mb->h = IS_8X8(mb_type) || IS_16X8(mb_type) ? 8 : 16;
+    mb->src_x = src_x;
+    mb->src_y = src_y;
+    mb->dst_x = dst_x;
+    mb->dst_y = dst_y;
+    mb->source = direction ? 1 : -1;
+    mb->flags = 0; // XXX: does mb_type contain extra information that could be exported here?
+    return 1;
+}
+
 /**
  * Print debugging info for the given picture.
  */
@@ -2112,6 +2132,86 @@ void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_
                          int *low_delay,
                          int mb_width, int mb_height, int mb_stride, int quarter_sample)
 {
+    if ((avctx->flags2 & CODEC_FLAG2_EXPORT_MVS) && mbtype_table && motion_val[0]) {
+        const int shift = 1 + quarter_sample;
+        const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1;
+        const int mv_stride      = (mb_width << mv_sample_log2) +
+                                   (avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1);
+        int mb_x, mb_y, mbcount = 0;
+
+        /* width * height * directions * 4MB (4MB for IS_8x8) */
+        AVExportedMV *mvs = av_malloc_array(mb_width * mb_height, 2 * 4 * sizeof(AVExportedMV));
+        if (!mvs)
+            return;
+
+        for (mb_y = 0; mb_y < mb_height; mb_y++) {
+            for (mb_x = 0; mb_x < mb_width; mb_x++) {
+                int i, direction, mb_type = mbtype_table[mb_x + mb_y * mb_stride];
+                for (direction = 0; direction < 2; direction++) {
+                    if (!USES_LIST(mb_type, direction))
+                        continue;
+                    if (IS_8X8(mb_type)) {
+                        for (i = 0; i < 4; i++) {
+                            int sx = mb_x * 16 + 4 + 8 * (i & 1);
+                            int sy = mb_y * 16 + 4 + 8 * (i >> 1);
+                            int xy = (mb_x * 2 + (i & 1) +
+                                      (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
+                            int mx = (motion_val[direction][xy][0] >> shift) + sx;
+                            int my = (motion_val[direction][xy][1] >> shift) + sy;
+                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, direction);
+                        }
+                    } else if (IS_16X8(mb_type)) {
+                        for (i = 0; i < 2; i++) {
+                            int sx = mb_x * 16 + 8;
+                            int sy = mb_y * 16 + 4 + 8 * i;
+                            int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
+                            int mx = (motion_val[direction][xy][0] >> shift);
+                            int my = (motion_val[direction][xy][1] >> shift);
+
+                            if (IS_INTERLACED(mb_type))
+                                my *= 2;
+
+                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction);
+                        }
+                    } else if (IS_8X16(mb_type)) {
+                        for (i = 0; i < 2; i++) {
+                            int sx = mb_x * 16 + 4 + 8 * i;
+                            int sy = mb_y * 16 + 8;
+                            int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
+                            int mx = motion_val[direction][xy][0] >> shift;
+                            int my = motion_val[direction][xy][1] >> shift;
+
+                            if (IS_INTERLACED(mb_type))
+                                my *= 2;
+
+                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction);
+                        }
+                    } else {
+                          int sx = mb_x * 16 + 8;
+                          int sy = mb_y * 16 + 8;
+                          int xy = (mb_x + mb_y * mv_stride) << mv_sample_log2;
+                          int mx = (motion_val[direction][xy][0]>>shift) + sx;
+                          int my = (motion_val[direction][xy][1]>>shift) + sy;
+                          mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, direction);
+                    }
+                }
+            }
+        }
+
+        if (mbcount) {
+            AVFrameSideData *sd;
+
+            av_log(avctx, AV_LOG_DEBUG, "Adding %d MVs info to frame %d\n", mbcount, avctx->frame_number);
+            sd = av_frame_new_side_data(pict, AV_FRAME_DATA_MV_INFO, mbcount * sizeof(AVExportedMV));
+            if (!sd)
+                return;
+            memcpy(sd->data, mvs, mbcount * sizeof(AVExportedMV));
+        }
+
+        av_freep(&mvs);
+    }
+
+    /* TODO: export all the following to make them accessible for users (and filters) */
     if (avctx->hwaccel || !mbtype_table
         || (avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU))
         return;
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 2e9dfa0..7000531 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -88,6 +88,7 @@ static const AVOption avcodec_options[] = {
 {"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"},
 {"chunks", "Frame data might be split into multiple chunks", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, "flags2"},
 {"showall", "Show all frames before the first keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"export_mvs", "export motion vectors through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_EXPORT_MVS}, INT_MIN, INT_MAX, V|D, "flags2"},
 {"me_method", "set motion estimation method", OFFSET(me_method), AV_OPT_TYPE_INT, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"},
 {"zero", "zero motion estimation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"full", "full motion estimation (slowest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },
diff --git a/libavutil/frame.h b/libavutil/frame.h
index dbbdd29..bd3be03 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -87,6 +87,12 @@ enum AVFrameSideDataType {
      * in ETSI TS 101 154 using AVActiveFormatDescription enum.
      */
     AV_FRAME_DATA_AFD,
+    /**
+     * Motion vectors exported by some codecs (on demand through
+     * -flags2 export_mvs).
+     * The data is the AVExportedMV struct defined in libavutil/mvinfo.h.
+     */
+    AV_FRAME_DATA_MV_INFO,
 };
 
 enum AVActiveFormatDescription {
diff --git a/libavutil/mvinfo.h b/libavutil/mvinfo.h
new file mode 100644
index 0000000..735f1b9
--- /dev/null
+++ b/libavutil/mvinfo.h
@@ -0,0 +1,49 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_MVINFO_H
+#define AVUTIL_MVINFO_H
+
+#include <stdint.h>
+
+typedef struct AVExportedMV {
+    /**
+     * Where the current comes from; negative value for past, positive value future.
+     * XXX: set exact relative ref frame reference instead of a +/- 1 "direction".
+     */
+    int32_t source;
+    /**
+     * Width and height of the block.
+     */
+    uint8_t w, h;
+    /**
+     * Absolute source position.
+     */
+    uint16_t src_x, src_y;
+    /**
+     * Absolute destination position.
+     */
+    uint16_t dst_x, dst_y;
+    /**
+     * Extra flag information.
+     * Currently unused.
+     */
+    uint64_t flags;
+} AVExportedMV;
+
+#endif /* AVUTIL_MVINFO_H */
-- 
2.0.4



More information about the ffmpeg-devel mailing list