[FFmpeg-devel] [PATCH] avcodec: add a native BBC Dirac VC-2 HQ encoder

Fri Jan 15 04:59:36 CET 2016

On Thu, Jan 14, 2016 at 2:00 PM, Rostislav Pehlivanov
<atomnuker at gmail.com> wrote:
[...]
>
> Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>
[...]
> +
> +#include "diracenc_transforms.h"
> +
> +#define COEF_LUT_TAB 1024
> +
> +typedef struct SubBand {
> +    int stride;
> +    int width;
> +    int height;
> +    dwtcoef *buf;
> +} SubBand;

struct is in your hands, generally order monotonically decreasing in
size to avoid unnecessary padding losses. Here it makes no difference,
but in other structs here it may very well. Of course, balance with
readability, so final judgement really up to you.

Second: this is likely just me, but I prefer explicit structs and not
typedef'ed ones as per the kernel coding styles. Seems like most of
FFmpeg does typedef's, so again use your judgement.

[...]
> +
> +static int rate_control(DiracEncContext *s, int sx, int sy)
> +{
> +    int quant_buf[2], bits_buf[2], quant = s->q_start, range = s->q_ceil/4;
> +    const int64_t top = s->slice_max_bits;
> +    const double percent = s->tolerance;
> +    const double bottom = top - top*(percent/100.0f);
> +    int bits = count_hq_slice(s, sx, sy, quant);
> +    while ((bits > top) || (bits < bottom)) {
> +        range *= bits > top ? +1 : -1;

this could use something like FFDIFFSIGN, unless bits == top can occur
(unlikely from your choice of names). Likely not speed relevant, so
whatever.

[...]
> +}
> +
> +/* VC-2 13.5.3 - hq_slice */
> +static int encode_hq_slice(AVCodecContext *avctx, void *arg)
> +{
> +    SliceArgs *slice_dat = arg;
> +    DiracEncContext *s = slice_dat->ctx;
> +    PutBitContext *pb = &slice_dat->pb;
> +    const int slice_x = slice_dat->x;
> +    const int slice_y = slice_dat->y;
> +    uint8_t quants[4][4];
> +    int p, j, level, orientation;
> +    int slice_bytes_start = put_bits_count(pb) >> 3;
> +    int quant_idx = rate_control(s, slice_x, slice_y);
> +
> +    avpriv_align_put_bits(pb);
> +    put_bits(pb, 8*s->prefix_bytes, 0);
> +    put_bits(pb, 8, quant_idx);
> +
> +    /* Slice quantization (slice_quantizers() in the specs) */
> +    for (level = 0; level < s->wavelet_depth; level++)
> +        for (orientation = !!level; orientation < 4; orientation++)
> +            quants[level][orientation] = FFMAX(quant_idx - s->quant[level][orientation], 0);
> +
> +    /* Luma + 2 Chroma planes */
> +    for (p = 0; p < 3; p++) {
> +        int bytes_start, bytes_len, pad_s, pad_c;
> +        avpriv_align_put_bits(pb);
> +        bytes_start = put_bits_count(pb) >> 3;
> +        put_bits(pb, 8, 0);
> +        for (level = 0; level < s->wavelet_depth; level++) {
> +            for (orientation = !!level; orientation < 4; orientation++) {

somewhere maybe clarify what !! does, i.e clamps to 0, 1.

> +                encode_subband(s, pb, slice_x, slice_y,
> +                               &s->plane[p].band[level][orientation],
> +                               quants[level][orientation]);
> +            }
> +        }

Maybe/maybe not unroll the loops, they are very small. Depends on
performance. My hunch is it is not worth it, but you never know.
[...]
> +
> +static void dwt_planes(DiracEncContext *s, const AVFrame *frame)
> +{
> +    int i, x, y, level;
> +    const int idx = s->wavelet_idx;
> +
> +    for (i = 0; i < 3; i++) {
> +        Plane *p = &s->plane[i];
> +        dwtcoef *buf = p->coef_buf;
> +        uint16_t *pix = (uint16_t *)frame->data[i];
> +
> +        for (y = 0; y < p->height; y++) {
> +            for (x = 0; x < p->width; x++) {
> +                buf[x] = pix[x] - s->diff_offset;
> +            }
> +            buf += p->coef_stride;
> +            pix += frame->linesize[i] >> 1;
> +        }
> +        memset(buf, 0, (p->coef_stride*p->dwt_height - p->height*p->width)*sizeof(dwtcoef));
> +
> +        /* DWT */
> +        for (level = s->wavelet_depth-1; level >= 0; level--) {
> +            SubBand *b = &p->band[level][0];
> +            s->t.dirac_subband_dwt[idx](&s->t, p->coef_buf, p->coef_stride,
> +                                        b->width, b->height);
> +        }
> +    }

Same comment about loops, just give it some simple thought.

> +}
> +
> +static av_cold int dirac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
> +                                      const AVFrame *frame, int *got_packet_ptr)
> +{
> +    int ret;
> +    double tpf;
> +    DiracEncContext *s = avctx->priv_data;
> +
> +    ret = ff_alloc_packet2(avctx, avpkt, avctx->width*avctx->height*sizeof(int32_t)*2, 0);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");

Maybe use ...output packet: av_err2str(ret) for a more complete message.

> +        return ret;
> +    }
> +
> +    s->avctx = avctx;
> +    s->prefix_bytes = 0;
> +    s->size_scaler = 32;
> +    s->custom_quant_matrix = 0;
> +    s->next_parse_offset = 0;
> +
> +    /* Rate control */
> +    tpf = s->avctx->time_base.num/(double)s->avctx->time_base.den;
> +    s->frame_max_bits = s->avctx->bit_rate*tpf;
> +    s->slice_max_bits = s->frame_max_bits/(s->num_x*s->num_y);

Make sure whatever needs to be nonzero here is nonzero, I have not checked.

[...]
> +}
[...]
> +static av_cold int dirac_encode_init(AVCodecContext *avctx)
> +{
> +    Plane *p;
> +    SubBand *b;
> +    int i, j, level, o, shift;
> +    DiracEncContext *s = avctx->priv_data;
> +
> +    s->q_ceil = FF_ARRAY_ELEMS(qscale_tab);
> +    s->next_parse_offset = 0;
> +    s->last_parse_offset = 0;
> +    s->strict_compliance = 1;
> +    s->q_start = lrint(s->q_ceil/2.3f);

Use lrintf, we have the neccessary libm/configure hacks in place.

> +
[...]
> +
> +    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt,
> +                                     &s->chroma_x_shift,
> +                                     &s->chroma_y_shift);

See lavu/pixdesc.h: if you are not checking the return code, use
av_get_chroma_sub_sample. From these docs I wanted to add
av_warn_unused_result; but for some reason this was lost down
ffmpeg-devel...

> +
> +    /* Planes initialization */
> +    for (i = 0; i < 3; i++) {
> +        int w, h;
> +        p = &s->plane[i];
> +        p->width      = avctx->width  >> (i ? s->chroma_x_shift : 0);
> +        p->height     = avctx->height >> (i ? s->chroma_y_shift : 0);
> +        p->dwt_width  = w = FFALIGN(p->width,  (1 << s->wavelet_depth));
> +        p->dwt_height = h = FFALIGN(p->height, (1 << s->wavelet_depth));
> +        p->coef_stride = FFALIGN(p->dwt_width, 32);
> +        p->coef_buf = av_malloc(p->coef_stride*p->dwt_height*sizeof(dwtcoef));
> +        if (!p->coef_buf)
> +            av_log(avctx, AV_LOG_ERROR, "Unable to allocate memory! - %i %i %i\n", w, h, p->coef_stride);

See https://stackoverflow.com/questions/1893490/difference-between-format-specifiers-i-and-d-in-printf.
Does av_log do something different here? Also, maybe a more expressive
message is useful? Further seems like you don't recally do anything on
failure, is this correct?

> +        av_log(avctx, AV_LOG_WARNING, "Padded size = %i %i %i\n", p->dwt_width, p->dwt_height, p->coef_stride);

Is this a stray, looks like it always gets logged?

> +        for (level = s->wavelet_depth-1; level >= 0; level--) {
> +            w = w >> 1;
> +            h = h >> 1;
> +            for (o = 0; o < 4; o++) {

very minor, but o and 0 are not the best idea next to each other ;).
Please try something other than o.

> +                b = &p->band[level][o];
> +                b->width  = w;
> +                b->height = h;
> +                b->stride = p->coef_stride;
> +                shift = (o > 1)*b->height*b->stride + (o & 1)*b->width;
> +                b->buf = p->coef_buf + shift;
> +            }
> +        }
> +    }
> +
> +    s->coef_lut_len = av_malloc(2*COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_len));
> +    s->coef_lut_val = av_malloc(2*COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_val));

both unchecked. At this stage it is also important to reason what
happens when any subset of these allocations fail: what needs to be
freed, where is it freed, when to fail hard to avoid dereferences,
etc.

> +
> +    for (i = 0; i < s->q_ceil; i++) {
> +        for (j = -COEF_LUT_TAB; j < COEF_LUT_TAB; j++) {
> +            coeff_quantize_get(j, qscale_tab[i], qoffset_tab[i] + 2,
> +                               &s->coef_lut_len[2*i*COEF_LUT_TAB + j + COEF_LUT_TAB],
> +                               &s->coef_lut_val[2*i*COEF_LUT_TAB + j + COEF_LUT_TAB]);
> +        }
> +    }
> +
> +    if (diracenc_init_transforms(&s->t, s->plane[0].coef_stride, s->plane[0].dwt_height))
> +        av_log(avctx, AV_LOG_ERROR, "Unable to allocate memory!\n");

Again, more expressive messages, and reasoning about failure patterns.

> +
> +    /* Slices */
> +    s->num_x = s->plane[0].dwt_width/s->slice_width;
> +    s->num_y = s->plane[0].dwt_height/s->slice_height;
> +
> +    s->slice_args = av_malloc(s->num_x*s->num_y*sizeof(SliceArgs));

unchecked

> +
> +    return 0;
> +}
> +
> +#define DIRACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
> +static const AVOption diracenc_options[] = {
> +    {"tolerance",     "Max undershoot in %", offsetof(DiracEncContext, tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 2.0f}, 0.1f, 99.0f, DIRACENC_FLAGS, "tolerance"},

why 0.1f, 99.0f?
What is the max/min it can really handle? From the vals, I would sense
1-\epsilon, \epsilon. Concretely, make sure there is absolutely no use
case that deals with < 0.1, or > 99.0.

> +};
> +
> +static const AVClass diracenc_class = {
> +    "BBC Dirac VC-2 encoder",
> +    av_default_item_name,
> +    diracenc_options,
> +    LIBAVUTIL_VERSION_INT,
> +};

maybe use field intializers?

> +
> +static const AVCodecDefault diracenc_defaults[] = {
> +    { "b",              "500000000"   },
> +    { NULL },
> +};

same

[...]
> +static void dirac_subband_dwt_97(DiracTransforms *t,
> +                                 dwtcoef *data, int stride,
> +                                 int width, int height)
> +{
> +    int x, y;
> +    dwtcoef *datal = data, *synth = t->buffer, *synthl = synth;
> +    const int synth_width = width  << 1;
> +    const int synth_height = height << 1;
> +
> +    /*
> +     * Shift in one bit that is used for additional precision and copy
> +     * the data to the buffer.
> +     */
> +    for (y = 0; y < synth_height; y++) {
> +        for (x = 0; x < synth_width; x++)
> +            synthl[x] = datal[x] << 1;
> +        synthl += synth_width;
> +        datal += stride;
> +    }
> +
> +    /* Horizontal synthesis. */
> +    synthl = synth;
> +    for (y = 0; y < synth_height; y++) {
> +        /* Lifting stage 2. */
> +        synthl[1] -= (8*synthl[0] + 9*synthl[2] - synthl[4] + 8) >> 4;
> +        for (x = 1; x < width - 2; x++)
> +            synthl[2*x + 1] -= (9*synthl[2*x] + 9*synthl[2*x + 2] - synthl[2*x + 4] -
> +            synthl[2 * x - 2] + 8) >> 4;

minor nit: inconsistent spacing in array indexing
> +}
> +
> +int diracenc_init_transforms(DiracTransforms *s, int p_width, int p_height)
> +{
> +    s->dirac_subband_dwt[DIRAC_TRANSFORM_9_7]    = dirac_subband_dwt_97;
> +    s->dirac_subband_dwt[DIRAC_TRANSFORM_5_3]    = dirac_subband_dwt_53;
> +    s->dirac_subband_dwt[DIRAC_TRANSFORM_13_7]   = dirac_subband_noop;
> +    s->dirac_subband_dwt[DIRAC_TRANSFORM_HAAR]   = dirac_subband_noop;
> +    s->dirac_subband_dwt[DIRAC_TRANSFORM_HAAR_S] = dirac_subband_noop;
> +    s->dirac_subband_dwt[DIRAC_TRANSFORM_FIDEL]  = dirac_subband_noop;
> +    s->dirac_subband_dwt[DIRAC_TRANSFORM_9_7_I]  = dirac_subband_noop;
> +
> +    s->buffer = av_malloc(2*p_width*p_height*sizeof(dwtcoef));
> +    if (!s->buffer)
> +        return 1;

checked, but no oom message unlike others. Further, shouldn't it
return e.g ENOMEM?

> +
> +    return 0;
> +}

> +typedef struct DiracTransforms {
> +    dwtcoef *buffer;
> +    void (*dirac_subband_dwt[DIRAC_TRANSFORMS_NB])(struct DiracTransforms *t,
> +                                                   dwtcoef *data, int stride,
> +                                                   int width, int height);
> +} DiracTransforms;
> +
> +int diracenc_init_transforms(DiracTransforms *t, int p_width, int p_height);

would be happy to see av_warn_unused_result, though you do check it now. ;)

All from pure code inspection; no study whatsoever of the format/testing.

> +void diracenc_deinit_transforms(DiracTransforms *t);
> +
> +#endif /* AVCODEC_DIRACWAVELET_H */
> --
> 2.7.0.rc3
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel