[FFmpeg-devel] [PATCH v4 1/2] lavc, doc, configure: add libxavs2 video encoder wrapper

Wed Sep 5 03:46:18 EEST 2018

On 03/09/18 03:42, hwren wrote:
> Signed-off-by: hwren <hwrenx at 126.com>
> ---
>  Changelog              |   1 +
>  configure              |   4 +
>  doc/encoders.texi      |  40 +++++++
>  doc/general.texi       |  14 +++
>  libavcodec/Makefile    |   1 +
>  libavcodec/allcodecs.c |   1 +
>  libavcodec/libxavs2.c  | 319 +++++++++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/version.h   |   2 +-
>  8 files changed, 381 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/libxavs2.c
> 
> ...
> diff --git a/libavcodec/libxavs2.c b/libavcodec/libxavs2.c
> new file mode 100644
> index 0000000..25db3cd
> --- /dev/null
> +++ b/libavcodec/libxavs2.c
> @@ -0,0 +1,319 @@
> +/*
> + * AVS2 encoding using the xavs2 library
> + *
> + * Copyright (C) 2018 Yiqun Xu,   <yiqun.xu at vipl.ict.ac.cn>
> + *                    Falei Luo,  <falei.luo at gmail.com>
> + *                    Huiwen Ren, <hwrenx at gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <ctype.h>
> +
> +#include "xavs2.h"
> +#include "avcodec.h"
> +#include "mpeg12.h"
> +#include "internal.h"
> +#include "libavutil/internal.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/avstring.h"
> +#include "libavutil/common.h"
> +#include "libavutil/avutil.h"
> +
> +#define DELAY_FRAMES 8

This constant is not used anywhere?

> +
> +typedef struct XAVS2EContext {
> +    AVClass *class;
> +
> +    void* handle;

This field is unused.

> +
> +    int i_lcurow_threads;
> +    int i_initial_qp;
> +    int preset_level;
> +    int intra_period;

There is a common option AVCodecContext.gop_size (-g) which should probably be used rather than inventing a new private option with the same meaning.

> +
> +    void *encoder;
> +    char *xavs2_opts;
> +
> +    int b_hierarchical_reference;
> +    int num_b_frames;

You're already using AVCodecContext.max_b_frames instead below, delete this field.

> +
> +    xavs2_outpacket_t packet;
> +    xavs2_param_t *param;
> +
> +    const xavs2_api_t *api;
> +
> +} XAVS2EContext;
> +
> +static av_cold int xavs2_init(AVCodecContext *avctx)
> +{
> +    XAVS2EContext *cae= avctx->priv_data;
> +    int bit_depth;
> +
> +    char str_bd[16] = {0}, str_iqp[16] = {0}, str_w[16] = {0}, str_h[16] = {0};
> +    char str_preset[16] = {0}, str_hr[16] = {0}, str_bf[16] = {0};
> +    char str_iv[16] = {0}, str_TBR[16] = {0}, str_fr[16] = {0};

Can you make some sort of macro to simplify this code and avoid all of these stack variables?

Something like:

#define xavs2_set_opt(name, format, ...) do { \
        char opt_str[16]; \
        av_strlcatf(opt_str, sizeof(opt_str), format, __VA_ARGS__); \
        cae->api->opt_set2(cae->param, name, opt_str); \
    } while (0)

and then:

    xavs2_set_opt("width",  "%d", avctx->width);
    xavs2_set_opt("height", "%d", avctx->height);

etc.

> +
> +    bit_depth = avctx->pix_fmt == AV_PIX_FMT_YUV420P ? 8 : 10;
> +
> +    /* get API handler */
> +    cae->api = xavs2_api_get(bit_depth);

The documentation says that this is allowed to return NULL on failure, so it should be checked.

> +
> +    cae->param = cae->api->opt_alloc();
> +
> +    if (!cae->param) {
> +        av_log(avctx, AV_LOG_ERROR, "param alloc failed\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    cae->api->opt_set2(cae->param,"rec","0");
> +    cae->api->opt_set2(cae->param,"log","0");
> +
> +    av_strlcatf(str_w, sizeof(str_w), "%d", avctx->width);
> +    av_strlcatf(str_h, sizeof(str_h), "%d", avctx->height);
> +    av_strlcatf(str_bd, sizeof(str_bd), "%d", bit_depth);
> +    av_strlcatf(str_iqp, sizeof(str_iqp), "%d", cae->i_initial_qp);
> +
> +    cae->api->opt_set2(cae->param,"width",str_w);
> +    cae->api->opt_set2(cae->param,"height",str_h);
> +    cae->api->opt_set2(cae->param, "initial_qp", str_iqp);
> +    cae->api->opt_set2(cae->param, "bitdepth", str_bd);
> +
> +    /* preset level */
> +    av_strlcatf(str_preset, sizeof(str_preset), "%d", cae->preset_level);
> +
> +    cae->api->opt_set2(cae->param,"preset", str_preset);
> +    /* bframes */
> +    av_log( avctx, AV_LOG_DEBUG,
> +            "HierarchicalReference %d, Number B Frames %d\n",
> +            cae->b_hierarchical_reference, cae->num_b_frames);

num_b_frames isn't used, it's avctx->max_b_frames.  (Not sure this log message is of any value, anyway?)

> +
> +    av_strlcatf(str_hr, sizeof(str_hr), "%d", cae->b_hierarchical_reference);
> +    //av_strlcatf(str_bf, sizeof(str_bf), "%d", cae->num_b_frames);

Please don't include dead comments.

> +    av_strlcatf(str_bf, sizeof(str_bf), "%d", avctx->max_b_frames);
> +
> +    cae->api->opt_set2(cae->param, "hierarchical_ref",  str_hr);
> +    cae->api->opt_set2(cae->param, "bframes",  str_bf);
> +
> +    if (cae->xavs2_opts) {
> +        AVDictionary *dict    = NULL;
> +        AVDictionaryEntry *en = NULL;
> +
> +        if (!av_dict_parse_string(&dict, cae->xavs2_opts, "=", ":", 0)) {
> +            while ((en = av_dict_get(dict, "", en, AV_DICT_IGNORE_SUFFIX))) {
> +
> +                int i_value = strtol(en->value, NULL, 10);
> +                av_strlcatf(str_iv, sizeof(str_iv), "%d", i_value);
> +
> +                int parse_ret = cae->api->opt_set2(cae->param, en->key, str_iv);
> +                if (parse_ret < 0) {
> +                    av_log(avctx, AV_LOG_WARNING,
> +                          "Invalid value for %s: %s\n", en->key, en->value);
> +                }

Are all option values necessarily integers?

Even if they are, since you're parsing a string and then restringifying it afterwards it might be easier to just pass the string directly (the code on the other side of the API does the value checking anyway).

> +
> +            }
> +            av_dict_free(&dict);
> +        }
> +    }
> +
> +    /* Rate control */
> +    int code;
> +
> +    if (avctx->bit_rate > 0) {
> +        cae->api->opt_set2(cae->param, "RateControl",  "1");
> +        av_strlcatf(str_TBR, sizeof(str_TBR), "%d", avctx->bit_rate);
> +        cae->api->opt_set2(cae->param, "TargetBitRate", str_TBR);
> +    }

What happens if there is no bitrate target?  Some sort of constant-quality mode?  Are there any parameters for that?

> +    cae->api->opt_set2(cae->param, "intraperiod", "50");

Probably shouldn't be hard-coded (use AVCodecContext.gop_size).

> +
> +    ff_mpeg12_find_best_frame_rate(avctx->framerate, &code, NULL, NULL, 0);
> +    av_strlcatf(str_fr, sizeof(str_fr), "%d", code);
> +
> +    cae->api->opt_set2(cae->param, "FrameRate", str_fr);
> +    cae->encoder = cae->api->encoder_create(cae->param);
> +
> +    if (!cae->encoder) {
> +        av_log(avctx,AV_LOG_ERROR, "Can not create encoder. Null pointer returned\n");

"Null pointer returned" is not useful for an end-user message.  Can you get any other information about what went wrong?  (If the library will log something the user can see that also works.)

> +        return AVERROR(EINVAL);
> +    }
> +
> +    return 0;
> +}
> +
> +static void xavs2_dump_frame_force(xavs2_picture_t *pic, AVFrame *frame, const int shift_in)
> +{
> +    int j, k;
> +    for (k = 0; k < 3; k++) {
> +        int i_stride = pic->img.i_stride[k];
> +        for (j = 0; j < pic->img.i_lines[k]; j++) {
> +            uint16_t *p_plane = (uint16_t *)pic->img.img_planes[k][j * i_stride];

Looks like a missing & - it's casting the uint8_t pixel value to a pointer.  (Has this case been tested?)

> +            int i;
> +            uint8_t *p_buffer = frame->data[k] + frame->linesize[k] * j;
> +            memset(p_plane, 0, i_stride);
> +            for (i = 0; i < pic->img.i_width[k]; i++) {
> +                p_plane[i] = p_buffer[i] << shift_in;
> +            }
> +        }
> +    }
> +}
> +
> +static void xavs2_dump_frame(xavs2_picture_t *pic, AVFrame *frame)
> +{
> +    int j, k;
> +    for (k = 0; k < 3; k++) {
> +        for (j = 0; j < pic->img.i_lines[k]; j++) {
> +            memcpy( pic->img.img_planes[k] + pic->img.i_stride[k] * j,
> +                    frame->data[k]+frame->linesize[k] * j,
> +                    pic->img.i_width[k] * pic->img.in_sample_size);
> +        }
> +    }
> +}

I find the names of these two functions are slightly confusing.  Maybe "xavs2_copy_input_frame_to_picture()" and "xavs2_copy_input_frame_to_picture_with_shift()", or something like that?

> +
> +static int xavs2_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
> +                      const AVFrame *frame, int *got_packet)
> +{
> +    XAVS2EContext *cae = avctx->priv_data;
> +    xavs2_picture_t pic;
> +
> +    /* create the XAVS2 video encoder */
> +    /* read frame data and send to the XAVS2 video encoder */
> +    if (cae->api->encoder_get_buffer(cae->encoder, &pic) < 0) {
> +        av_log(avctx,AV_LOG_ERROR, "failed to get frame buffer\n");
> +        return AVERROR(EINVAL);

This probably shouldn't be EINVAL, since that would indicate that the user has passed an invalid argument.

When can it happen?  If this can only happen on out-of-memory then ENOMEM; if there is some error case inside the library which can be hit then maybe AVERROR_EXTERNAL?

> +    }
> +    if (frame) {
> +        switch (frame->format) {
> +            case AV_PIX_FMT_YUV420P:
> +                if (pic.img.in_sample_size == pic.img.enc_sample_size) {
> +                    xavs2_dump_frame(&pic, frame);
> +                } else {
> +                    const int shift_in = atoi(cae->api->opt_get(cae->param, "SampleShift"));
> +                    xavs2_dump_frame_force(&pic, frame, shift_in);
> +                }
> +            break;
> +            case AV_PIX_FMT_YUV420P10:
> +                if (pic.img.in_sample_size == pic.img.enc_sample_size) {
> +                    xavs2_dump_frame(&pic, frame);
> +                } else {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                          "Unsupportted input pixel format: 10-bit is not supported\n");

Maybe tell the user that the encoder is in 8-bit mode so 8-bit input is required?

This and the below failures might need to clean up pic as well (unless that happens automatically somehow).

> +                    return AVERROR(EINVAL);
> +                }
> +            break;
> +            default:
> +                av_log(avctx, AV_LOG_ERROR, "Unsupportted pixel format\n");

Typo: "Unsupported"

> +                return AVERROR(EINVAL);
> +            break;
> +        }
> +
> +        pic.i_state = 0;
> +        pic.i_pts  = frame->pts;
> +        pic.i_type = XAVS2_TYPE_AUTO;

Optional extra: this looks like you can trivially add support for by setting this to XAVS2_TYPE_IDR (or I) frame->pict_type is AV_PICTURE_TYPE_I?

> +
> +        int ret;

No mixed declarations and code; put it at the start of the function or block.

> +        ret = cae->api->encoder_encode(cae->encoder, &pic, &cae->packet);
> +
> +        if (ret) {
> +            av_log(avctx, AV_LOG_ERROR, "encode failed\n");
> +            return AVERROR(EINVAL);

Not EINVAL.  If you can't get any other information about what went wrong then AVERROR_EXTERNAL.

> +        }
> +
> +    } else {
> +        cae->api->encoder_encode(cae->encoder, NULL, &cae->packet);
> +    }
> +
> +    if ((cae->packet.len) && (cae->packet.state != XAVS2_STATE_FLUSH_END)){

Is there any particular reason why the packet is in the context structure but the picture is on the stack?  They look like they should have effectively equivalent lifetime and therefore be treated in the same way.

> +
> +        if (av_new_packet(pkt, cae->packet.len) < 0){
> +            av_log(avctx, AV_LOG_ERROR, "packet alloc failed\n");
> +            return AVERROR(EINVAL);

ENOMEM.  You probably need to unref the xavs2 packet as well if this happens?

> +        }
> +
> +        pkt->pts = cae->packet.pts;
> +        pkt->dts = cae->packet.dts;
> +
> +        memcpy(pkt->data, cae->packet.stream, cae->packet.len);
> +        pkt->size = cae->packet.len;
> +
> +        cae->api->encoder_packet_unref(cae->encoder, &cae->packet);
> +
> +        *got_packet = 1;
> +    } else {
> +        *got_packet = 0;
> +    }
> +
> +    return 0;
> +}
> +
> +static av_cold int xavs2_close(AVCodecContext *avctx)
> +{
> +    XAVS2EContext *cae = avctx->priv_data;
> +    /* destroy the encoder */
> +    if (cae->api) {
> +        cae->api->encoder_destroy(cae->encoder);
> +
> +        if (cae->param) {
> +            cae->api->opt_destroy(cae->param);
> +        }
> +    }
> +    return 0;
> +}
> +
> +#define OFFSET(x) offsetof(XAVS2EContext, x)
> +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
> +
> +static const AVOption options[] = {
> +   { "i_lcurow_threads",           "number of parallel threads for rows"     ,       OFFSET(i_lcurow_threads),  AV_OPT_TYPE_INT,    {.i64 =  5 }, 1,   8,  VE },

This option is never used?

> +    { "i_initial_qp"    ,           "Quantization parameter",       OFFSET(i_initial_qp)    ,  AV_OPT_TYPE_INT,    {.i64 = 34 }, 1,  63,  VE },
> +    { "preset_level"    ,           "Speed level"           ,       OFFSET(preset_level)    ,  AV_OPT_TYPE_INT,    {.i64 =  0 }, 0,   9,  VE },

Should this perhaps always be called speed rather than preset?

Maybe also include some simple explanation of the value like "(higher is slower/better)".

> +    { "intra_period"    ,           "Intra period"          ,       OFFSET(intra_period)    ,  AV_OPT_TYPE_INT,    {.i64 =  4 }, 3, 100,  VE },
> +    { "hierarchical_ref",           "hierarchical reference",       OFFSET(b_hierarchical_reference)    ,  AV_OPT_TYPE_INT,    {.i64 =  1 }, 0, 1,  VE },

AV_OPT_TYPE_BOOL

> +    { "num_bframes"     ,           "number of B frames"    ,       OFFSET(num_b_frames)    ,  AV_OPT_TYPE_INT,    {.i64 =  7 }, 0,  15,  VE },
> +    { "xavs2-params",    "set the xavs2 configuration using a :-separated list of key=value parameters", OFFSET(xavs2_opts), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
> +    { NULL },
> +};
> +
> +static const AVClass xavs2_class = {
> +    .class_name = "XAVS2EContext",

"libxavs2" would be more consistent with other class names.

> +    .item_name  = av_default_item_name,
> +    .option     = options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +static const AVCodecDefault xavs2_defaults[] = {
> +    { "b",                "0" },
> +    { NULL },
> +};
> +
> +AVCodec ff_libxavs2_encoder = {
> +    .name           = "libxavs2",
> +    .long_name      = NULL_IF_CONFIG_SMALL("libxavs2 AVS2-P2/IEEE1857.4"),
> +    .type           = AVMEDIA_TYPE_VIDEO,
> +    .id             = AV_CODEC_ID_AVS2,
> +    .priv_data_size = sizeof(XAVS2EContext),
> +    .init           = xavs2_init,
> +    .encode2        = xavs2_encode_frame,
> +    .close          = xavs2_close,
> +    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,

Since AVCodecContext.thread_count is not used AUTO_THREADS probably shouldn't be set (see docs).

> +    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_NONE },
> +    .priv_class     = &xavs2_class,
> +    .defaults       = xavs2_defaults,
> +    .wrapper_name   = "libxavs2",
> +} ;

Thanks,

- Mark