[FFmpeg-devel] [PATCH 1/2] apng: Support inter-frame compression

Wed Jul 15 16:51:58 CEST 2015

Dana 13. 7. 2015. 10:17 osoba "Donny Yang" <work at kota.moe> napisala je:
>
> The current algorithm is just "try all the combinations, and pick the
best".
> It's not very fast either, probably due to a lot of copying, but will do
for
> an initial implementation.
> ---
>  libavcodec/pngenc.c | 423
+++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 387 insertions(+), 36 deletions(-)
>
> diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
> index 7a9d0b0..c78c8dc 100644
> --- a/libavcodec/pngenc.c
> +++ b/libavcodec/pngenc.c
> @@ -36,6 +36,14 @@
>
>  #define IOBUF_SIZE 4096
>
> +typedef struct APNGFctlChunk {
> +    uint32_t sequence_number;
> +    uint32_t width, height;
> +    uint32_t x_offset, y_offset;
> +    uint16_t delay_num, delay_den;
> +    uint8_t dispose_op, blend_op;
> +} APNGFctlChunk;
> +
>  typedef struct PNGEncContext {
>      AVClass *class;
>      HuffYUVEncDSPContext hdsp;
> @@ -59,6 +67,12 @@ typedef struct PNGEncContext {
>      // APNG
>      uint32_t palette_checksum;   // Used to ensure a single unique
palette
>      uint32_t sequence_number;
> +
> +    AVFrame *prev_frame;
> +    AVFrame *last_frame;
> +    APNGFctlChunk last_frame_fctl;
> +    uint8_t *last_frame_packet;
> +    size_t last_frame_packet_size;
>  } PNGEncContext;
>
>  static void png_get_interlaced_row(uint8_t *dst, int row_size,
> @@ -403,7 +417,7 @@ static int encode_frame(AVCodecContext *avctx, const
AVFrame *pict)
>      uint8_t *progressive_buf = NULL;
>      uint8_t *top_buf         = NULL;
>
> -    row_size = (avctx->width * s->bits_per_pixel + 7) >> 3;
> +    row_size = (pict->width * s->bits_per_pixel + 7) >> 3;
>
>      crow_base = av_malloc((row_size + 32) << (s->filter_type ==
PNG_FILTER_VALUE_MIXED));
>      if (!crow_base) {
> @@ -430,16 +444,16 @@ static int encode_frame(AVCodecContext *avctx,
const AVFrame *pict)
>          for (pass = 0; pass < NB_PASSES; pass++) {
>              /* NOTE: a pass is completely omitted if no pixels would be
>               * output */
> -            pass_row_size = ff_png_pass_row_size(pass,
s->bits_per_pixel, avctx->width);
> +            pass_row_size = ff_png_pass_row_size(pass,
s->bits_per_pixel, pict->width);
>              if (pass_row_size > 0) {
>                  top = NULL;
> -                for (y = 0; y < avctx->height; y++)
> +                for (y = 0; y < pict->height; y++)
>                      if ((ff_png_pass_ymask[pass] << (y & 7)) & 0x80) {
>                          ptr = p->data[0] + y * p->linesize[0];
>                          FFSWAP(uint8_t *, progressive_buf, top_buf);
>                          png_get_interlaced_row(progressive_buf,
pass_row_size,
>                                                 s->bits_per_pixel, pass,
> -                                               ptr, avctx->width);
> +                                               ptr, pict->width);
>                          crow = png_choose_filter(s, crow_buf,
progressive_buf,
>                                                   top, pass_row_size,
s->bits_per_pixel >> 3);
>                          png_write_row(avctx, crow, pass_row_size + 1);
> @@ -449,7 +463,7 @@ static int encode_frame(AVCodecContext *avctx, const
AVFrame *pict)
>          }
>      } else {
>          top = NULL;
> -        for (y = 0; y < avctx->height; y++) {
> +        for (y = 0; y < pict->height; y++) {
>              ptr = p->data[0] + y * p->linesize[0];
>              crow = png_choose_filter(s, crow_buf, ptr, top,
>                                       row_size, s->bits_per_pixel >> 3);
> @@ -530,6 +544,275 @@ static int encode_png(AVCodecContext *avctx,
AVPacket *pkt,
>      return 0;
>  }
>
> +static int apng_do_inverse_blend(AVFrame *output, const AVFrame *input,
> +                                  APNGFctlChunk *fctl_chunk, uint8_t bpp)
> +{
> +    // output: background, input: foreground
> +    // output the image such that when blended with the background, will
produce the foreground
> +
> +    uint32_t x, y;
> +    uint32_t leftmost_x = input->width;
> +    uint32_t rightmost_x = 0;
> +    uint32_t topmost_y = input->height;
> +    uint32_t bottommost_y = 0;
> +    const uint8_t *input_data = input->data[0];
> +    uint8_t *output_data = output->data[0];
> +    size_t input_linesize = input->linesize[0];
> +    size_t output_linesize;
> +
> +    // Find bounding box of changes
> +    for (y = 0; y < input->height; ++y) {
> +        for (x = 0; x < input->width; ++x) {
> +            if (!memcmp(input_data + bpp * x, output_data + bpp * x,
bpp))
> +                continue;
> +
> +            if (x < leftmost_x)
> +                leftmost_x = x;
> +            if (x >= rightmost_x)
> +                rightmost_x = x + 1;
> +            if (y < topmost_y)
> +                topmost_y = y;
> +            if (y >= bottommost_y)
> +                bottommost_y = y + 1;
> +        }
> +
> +        input_data += input_linesize;
> +        output_data += input_linesize;

This is wrong, and I think causes random artifacts in the output.

> +    }
> +
> +    if (leftmost_x == input->width && rightmost_x == 0) {
> +        // Empty frame
> +        // APNG does not support empty frames, so we make it a 1x1 frame
> +        leftmost_x = topmost_y = 0;
> +        rightmost_x = bottommost_y = 1;
> +    }
> +
> +    output_linesize = FFALIGN(output->width * bpp, 32);

Again wrong, leave linesize as it is.

> +
> +    // Do actual inverse blending
> +    if (fctl_chunk->blend_op == APNG_BLEND_OP_SOURCE) {
> +        output_data = output->data[0];
> +        for (y = topmost_y; y < bottommost_y; ++y) {
> +            memcpy(output_data,
> +                   input->data[0] + input_linesize * y + bpp *
leftmost_x,
> +                   bpp * (rightmost_x - leftmost_x));
> +            output_data += output_linesize;
> +        }
> +    } else { // APNG_BLEND_OP_OVER
> +        size_t transparent_palette_index;
> +        uint32_t *palette;
> +
> +        switch (input->format) {
> +        case AV_PIX_FMT_RGBA64BE:
> +        case AV_PIX_FMT_YA16BE:
> +        case AV_PIX_FMT_RGBA:
> +        case AV_PIX_FMT_GRAY8A:
> +            break;
> +
> +        case AV_PIX_FMT_PAL8:
> +            palette = (uint32_t*)input->data[1];
> +            for (transparent_palette_index = 0;
transparent_palette_index < 256; ++transparent_palette_index)
> +                if (palette[transparent_palette_index] >> 24 == 0)
> +                    break;
> +            break;
> +
> +        default:
> +            // No alpha, so blending not possible
> +            return -1;
> +        }
> +
> +        for (y = topmost_y; y < bottommost_y; ++y) {
> +            uint8_t *foreground = input->data[0] + input_linesize * y +
bpp * leftmost_x;
> +            uint8_t *background = output->data[0] + input_linesize * y +
bpp * leftmost_x;

You are using wrong linesize here.

> +            output_data = output->data[0] + output_linesize * (y -
topmost_y);
> +            for (x = leftmost_x; x < rightmost_x; ++x, foreground +=
bpp, background += bpp, output_data += bpp) {
> +                if (!memcmp(foreground, background, bpp)) {
> +                    if (input->format == AV_PIX_FMT_PAL8) {
> +                        if (transparent_palette_index == 256) {
> +                            // Need fully transparent colour, but none
exists
> +                            return -1;
> +                        }
> +
> +                        *output_data = transparent_palette_index;
> +                    } else {
> +                        memset(output_data, 0, bpp);
> +                    }
> +                    continue;
> +                }
> +
> +                // Check for special alpha values, since full inverse
> +                // alpha-on-alpha blending is rarely possible, and when
> +                // possible, doesn't compress much better than
> +                // APNG_BLEND_OP_SOURCE blending
> +                switch (input->format) {
> +                case AV_PIX_FMT_RGBA64BE:
> +                    if (((uint16_t*)foreground)[3] == 0xffff ||
> +                        ((uint16_t*)background)[3] == 0)
> +                        break;
> +                    return -1;
> +
> +                case AV_PIX_FMT_YA16BE:
> +                    if (((uint16_t*)foreground)[1] == 0xffff ||
> +                        ((uint16_t*)background)[1] == 0)
> +                        break;
> +                    return -1;
> +
> +                case AV_PIX_FMT_RGBA:
> +                    if (foreground[3] == 0xff || background[3] == 0)
> +                        break;
> +                    return -1;
> +
> +                case AV_PIX_FMT_GRAY8A:
> +                    if (foreground[1] == 0xff || background[1] == 0)
> +                        break;
> +                    return -1;
> +
> +                case AV_PIX_FMT_PAL8:
> +                    if (palette[*foreground] >> 24 == 0xff ||
> +                        palette[*background] >> 24 == 0)
> +                        break;
> +                    return -1;
> +                }
> +
> +                memmove(output_data, foreground, bpp);
> +            }
> +        }
> +    }
> +
> +    output->width = rightmost_x - leftmost_x;
> +    output->height = bottommost_y - topmost_y;
> +    output->linesize[0] = output_linesize;

Nope, can't do this.

> +    fctl_chunk->width = output->width;
> +    fctl_chunk->height = output->height;
> +    fctl_chunk->x_offset = leftmost_x;
> +    fctl_chunk->y_offset = topmost_y;
> +
> +    return 0;
> +}
> +
> +static int apng_encode_frame(AVCodecContext *avctx, const AVFrame *pict,
> +                             APNGFctlChunk *best_fctl_chunk,
APNGFctlChunk *best_last_fctl_chunk)
> +{
> +    PNGEncContext *s = avctx->priv_data;
> +    int ret;
> +    uint32_t y;
> +    AVFrame* diffFrame;
> +    uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
> +    uint8_t *original_bytestream, *original_bytestream_end;
> +    uint8_t *temp_bytestream = 0, *temp_bytestream_end;
> +    uint32_t best_sequence_number;
> +    uint8_t *best_bytestream;
> +    size_t best_bytestream_size = SIZE_MAX;
> +    APNGFctlChunk last_fctl_chunk = *best_last_fctl_chunk;
> +    APNGFctlChunk fctl_chunk = *best_fctl_chunk;
> +
> +    if (avctx->frame_number == 0) {
> +        best_fctl_chunk->width = pict->width;
> +        best_fctl_chunk->height = pict->height;
> +        best_fctl_chunk->x_offset = 0;
> +        best_fctl_chunk->y_offset = 0;
> +        best_fctl_chunk->blend_op = APNG_BLEND_OP_SOURCE;
> +        return encode_frame(avctx, pict);
> +    }
> +
> +    diffFrame = av_frame_alloc();
> +    if (!diffFrame)
> +        return AVERROR(ENOMEM);
> +
> +    diffFrame->format = pict->format;
> +    diffFrame->width = pict->width;
> +    diffFrame->height = pict->height;
> +    if ((ret = av_frame_get_buffer(diffFrame, 32)) < 0)
> +        goto fail;
> +
> +    original_bytestream = s->bytestream;
> +    original_bytestream_end = s->bytestream_end;
> +
> +    temp_bytestream = av_malloc(original_bytestream_end -
original_bytestream);
> +    temp_bytestream_end = temp_bytestream + (original_bytestream_end -
original_bytestream);
> +    if (!temp_bytestream) {
> +        ret = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    for (last_fctl_chunk.dispose_op = 0; last_fctl_chunk.dispose_op < 3;
++last_fctl_chunk.dispose_op) {
> +        // 0: APNG_DISPOSE_OP_NONE
> +        // 1: APNG_DISPOSE_OP_BACKGROUND
> +        // 2: APNG_DISPOSE_OP_PREVIOUS
> +
> +        for (fctl_chunk.blend_op = 0; fctl_chunk.blend_op < 2;
++fctl_chunk.blend_op) {
> +            // 0: APNG_BLEND_OP_SOURCE
> +            // 1: APNG_BLEND_OP_OVER
> +
> +            uint32_t original_sequence_number = s->sequence_number,
sequence_number;
> +            uint8_t *bytestream_start = s->bytestream;
> +            size_t bytestream_size;
> +
> +            // Do disposal
> +            if (last_fctl_chunk.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
> +                memcpy(diffFrame->data[0], s->last_frame->data[0],
> +                       s->last_frame->linesize[0] *
s->last_frame->height);
> +
> +                if (last_fctl_chunk.dispose_op ==
APNG_DISPOSE_OP_BACKGROUND) {
> +                    for (y = last_fctl_chunk.y_offset; y <
last_fctl_chunk.y_offset + last_fctl_chunk.height; ++y) {
> +                        size_t row_start = s->last_frame->linesize[0] *
y + bpp * last_fctl_chunk.x_offset;
> +                        memset(diffFrame->data[0] + row_start, 0, bpp *
last_fctl_chunk.width);
> +                    }
> +                }
> +            } else {
> +                if (!s->prev_frame)
> +                    continue;
> +
> +                memcpy(diffFrame->data[0], s->prev_frame->data[0],
> +                       s->prev_frame->linesize[0] *
s->prev_frame->height);
> +            }
> +
> +            // Do inverse blending
> +            if (apng_do_inverse_blend(diffFrame, pict, &fctl_chunk, bpp)
< 0)
> +                continue;
> +
> +            // Do encoding
> +            ret = encode_frame(avctx, diffFrame);
> +            sequence_number = s->sequence_number;
> +            s->sequence_number = original_sequence_number;
> +            bytestream_size = s->bytestream - bytestream_start;
> +            s->bytestream = bytestream_start;
> +            if (ret < 0)
> +                goto fail;
> +
> +            if (bytestream_size < best_bytestream_size) {
> +                *best_fctl_chunk = fctl_chunk;
> +                *best_last_fctl_chunk = last_fctl_chunk;
> +
> +                best_sequence_number = sequence_number;
> +                best_bytestream = s->bytestream;
> +                best_bytestream_size = bytestream_size;
> +
> +                if (best_bytestream == original_bytestream) {
> +                    s->bytestream = temp_bytestream;
> +                    s->bytestream_end = temp_bytestream_end;
> +                } else {
> +                    s->bytestream = original_bytestream;
> +                    s->bytestream_end = original_bytestream_end;
> +                }
> +            }
> +        }
> +    }
> +
> +    s->sequence_number = best_sequence_number;
> +    s->bytestream = original_bytestream + best_bytestream_size;
> +    s->bytestream_end = original_bytestream_end;
> +    if (best_bytestream != original_bytestream)
> +        memcpy(original_bytestream, best_bytestream,
best_bytestream_size);
> +
> +    ret = 0;
> +
> +fail:
> +    av_freep(&temp_bytestream);
> +    av_frame_free(&diffFrame);
> +    return ret;
> +}
> +
>  static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
>                         const AVFrame *pict, int *got_packet)
>  {
> @@ -537,9 +820,9 @@ static int encode_apng(AVCodecContext *avctx,
AVPacket *pkt,
>      int ret;
>      int enc_row_size;
>      size_t max_packet_size;
> -    uint8_t buf[26];
> +    APNGFctlChunk fctl_chunk;
>
> -    if (avctx->codec_id == AV_CODEC_ID_APNG && s->color_type ==
PNG_COLOR_TYPE_PALETTE) {
> +    if (pict && avctx->codec_id == AV_CODEC_ID_APNG && s->color_type ==
PNG_COLOR_TYPE_PALETTE) {
>          uint32_t checksum = ~av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE),
~0U, pict->data[1], 256 * sizeof(uint32_t));
>
>          if (avctx->frame_number == 0) {
> @@ -560,47 +843,111 @@ static int encode_apng(AVCodecContext *avctx,
AVPacket *pkt,
>          );
>      if (max_packet_size > INT_MAX)
>          return AVERROR(ENOMEM);
> -    ret = ff_alloc_packet2(avctx, pkt, max_packet_size);
> -    if (ret < 0)
> -        return ret;
> -
> -    s->bytestream_start =
> -    s->bytestream       = pkt->data;
> -    s->bytestream_end   = pkt->data + pkt->size;
>
>      if (avctx->frame_number == 0) {
> +        s->bytestream = avctx->extradata = av_malloc(FF_MIN_BUFFER_SIZE);
> +        if (!avctx->extradata)
> +            return AVERROR(ENOMEM);
> +
>          ret = encode_headers(avctx, pict);
>          if (ret < 0)
>              return ret;
>
> -        avctx->extradata = av_malloc(s->bytestream -
s->bytestream_start);
> -        if (!avctx->extradata)
> +        avctx->extradata_size = s->bytestream - avctx->extradata;
> +
> +        s->last_frame_packet = av_malloc(max_packet_size);
> +        if (!s->last_frame_packet)
>              return AVERROR(ENOMEM);
> -        avctx->extradata_size = s->bytestream - s->bytestream_start;
> -        memcpy(avctx->extradata, s->bytestream_start, s->bytestream -
s->bytestream_start);
> +    } else if (s->last_frame) {
> +        ret = ff_alloc_packet2(avctx, pkt, max_packet_size);
> +        if (ret < 0)
> +            return ret;
>
> -        s->bytestream = s->bytestream_start;
> +        memcpy(pkt->data, s->last_frame_packet,
s->last_frame_packet_size);
> +        pkt->size = s->last_frame_packet_size;
> +        pkt->pts = pkt->dts = s->last_frame->pts;
>      }
>
> -    AV_WB32(buf, s->sequence_number);
> -    AV_WB32(buf + 4, avctx->width);
> -    AV_WB32(buf + 8, avctx->height);
> -    AV_WB32(buf + 12, 0); // x offset
> -    AV_WB32(buf + 16, 0); // y offset
> -    AV_WB16(buf + 20, 0); // delay numerator (filled in during muxing)
> -    AV_WB16(buf + 22, 0); // delay denominator
> -    buf[24] = APNG_DISPOSE_OP_BACKGROUND;
> -    buf[25] = APNG_BLEND_OP_SOURCE;
> -    png_write_chunk(&s->bytestream, MKTAG('f', 'c', 'T', 'L'), buf, 26);
> -    ++s->sequence_number;
> +    if (pict) {
> +        s->bytestream_start =
> +        s->bytestream       = s->last_frame_packet;
> +        s->bytestream_end   = s->bytestream + max_packet_size;
>
> -    ret = encode_frame(avctx, pict);
> -    if (ret < 0)
> -        return ret;
> +        // We're encoding the frame first, so we have to do a bit of
shuffling around
> +        // to have the image data write to the correct place in the
buffer
> +        fctl_chunk.sequence_number = s->sequence_number;
> +        ++s->sequence_number;
> +        s->bytestream += 26 + 12;
>
> -    pkt->size = s->bytestream - s->bytestream_start;
> -    pkt->flags |= AV_PKT_FLAG_KEY;
> -    *got_packet = 1;
> +        ret = apng_encode_frame(avctx, pict, &fctl_chunk,
&s->last_frame_fctl);
> +        if (ret < 0)
> +            return ret;
> +
> +        fctl_chunk.delay_num = 0; // delay filled in during muxing
> +        fctl_chunk.delay_den = 0;
> +    } else {
> +        s->last_frame_fctl.dispose_op = APNG_DISPOSE_OP_NONE;
> +    }
> +
> +    if (s->last_frame) {
> +        uint8_t* last_fctl_chunk_start = pkt->data;
> +        uint8_t buf[26];
> +
> +        AV_WB32(buf + 0, s->last_frame_fctl.sequence_number);
> +        AV_WB32(buf + 4, s->last_frame_fctl.width);
> +        AV_WB32(buf + 8, s->last_frame_fctl.height);
> +        AV_WB32(buf + 12, s->last_frame_fctl.x_offset);
> +        AV_WB32(buf + 16, s->last_frame_fctl.y_offset);
> +        AV_WB16(buf + 20, s->last_frame_fctl.delay_num);
> +        AV_WB16(buf + 22, s->last_frame_fctl.delay_den);
> +        buf[24] = s->last_frame_fctl.dispose_op;
> +        buf[25] = s->last_frame_fctl.blend_op;
> +        png_write_chunk(&last_fctl_chunk_start, MKTAG('f', 'c', 'T',
'L'), buf, 26);
> +
> +        *got_packet = 1;
> +    }
> +
> +    if (pict) {
> +        if (!s->last_frame) {
> +            s->last_frame = av_frame_alloc();
> +            if (!s->last_frame)
> +                return AVERROR(ENOMEM);
> +        } else if (s->last_frame_fctl.dispose_op !=
APNG_DISPOSE_OP_PREVIOUS) {
> +            if (!s->prev_frame) {
> +                s->prev_frame = av_frame_alloc();
> +                if (!s->prev_frame)
> +                    return AVERROR(ENOMEM);
> +
> +                s->prev_frame->format = pict->format;
> +                s->prev_frame->width = pict->width;
> +                s->prev_frame->height = pict->height;
> +                if ((ret = av_frame_get_buffer(s->prev_frame, 32)) < 0)
> +                    return ret;
> +            }
> +
> +            // Do disposal, but not blending
> +            memcpy(s->prev_frame->data[0], s->last_frame->data[0],
> +                   s->last_frame->linesize[0] * s->last_frame->height);
> +            if (s->last_frame_fctl.dispose_op ==
APNG_DISPOSE_OP_BACKGROUND) {
> +                uint32_t y;
> +                uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
> +                for (y = s->last_frame_fctl.y_offset; y <
s->last_frame_fctl.y_offset + s->last_frame_fctl.height; ++y) {
> +                    size_t row_start = s->last_frame->linesize[0] * y +
bpp * s->last_frame_fctl.x_offset;
> +                    memset(s->prev_frame->data[0] + row_start, 0, bpp *
s->last_frame_fctl.width);
> +                }
> +            }
> +        }
> +
> +        av_frame_unref(s->last_frame);
> +        ret = av_frame_ref(s->last_frame, (AVFrame*)pict);
> +        if (ret < 0)
> +            return ret;
> +
> +        s->last_frame_fctl = fctl_chunk;
> +        s->last_frame_packet_size = s->bytestream - s->bytestream_start;
> +    } else {
> +        av_frame_free(&s->last_frame);
> +    }
>
>      return 0;
>  }
> @@ -714,6 +1061,9 @@ static av_cold int png_enc_close(AVCodecContext
*avctx)
>
>      deflateEnd(&s->zstream);
>      av_frame_free(&avctx->coded_frame);
> +    av_frame_free(&s->last_frame);
> +    av_frame_free(&s->prev_frame);
> +    av_freep(&s->last_frame_packet);
>      return 0;
>  }
>
> @@ -769,6 +1119,7 @@ AVCodec ff_apng_encoder = {
>      .init           = png_enc_init,
>      .close          = png_enc_close,
>      .encode2        = encode_apng,
> +    .capabilities   = CODEC_CAP_DELAY,
>      .pix_fmts       = (const enum AVPixelFormat[]) {
>          AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
>          AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE,
> --
> 2.4.5
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel