[FFmpeg-devel] [PATCH 1/2] apng: Support inter-frame compression
Paul B Mahol
onemda at gmail.com
Wed Jul 15 16:51:58 CEST 2015
Dana 13. 7. 2015. 10:17 osoba "Donny Yang" <work at kota.moe> napisala je:
>
> The current algorithm is just "try all the combinations, and pick the
best".
> It's not very fast either, probably due to a lot of copying, but will do
for
> an initial implementation.
> ---
> libavcodec/pngenc.c | 423
+++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 387 insertions(+), 36 deletions(-)
>
> diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
> index 7a9d0b0..c78c8dc 100644
> --- a/libavcodec/pngenc.c
> +++ b/libavcodec/pngenc.c
> @@ -36,6 +36,14 @@
>
> #define IOBUF_SIZE 4096
>
> +typedef struct APNGFctlChunk {
> + uint32_t sequence_number;
> + uint32_t width, height;
> + uint32_t x_offset, y_offset;
> + uint16_t delay_num, delay_den;
> + uint8_t dispose_op, blend_op;
> +} APNGFctlChunk;
> +
> typedef struct PNGEncContext {
> AVClass *class;
> HuffYUVEncDSPContext hdsp;
> @@ -59,6 +67,12 @@ typedef struct PNGEncContext {
> // APNG
> uint32_t palette_checksum; // Used to ensure a single unique
palette
> uint32_t sequence_number;
> +
> + AVFrame *prev_frame;
> + AVFrame *last_frame;
> + APNGFctlChunk last_frame_fctl;
> + uint8_t *last_frame_packet;
> + size_t last_frame_packet_size;
> } PNGEncContext;
>
> static void png_get_interlaced_row(uint8_t *dst, int row_size,
> @@ -403,7 +417,7 @@ static int encode_frame(AVCodecContext *avctx, const
AVFrame *pict)
> uint8_t *progressive_buf = NULL;
> uint8_t *top_buf = NULL;
>
> - row_size = (avctx->width * s->bits_per_pixel + 7) >> 3;
> + row_size = (pict->width * s->bits_per_pixel + 7) >> 3;
>
> crow_base = av_malloc((row_size + 32) << (s->filter_type ==
PNG_FILTER_VALUE_MIXED));
> if (!crow_base) {
> @@ -430,16 +444,16 @@ static int encode_frame(AVCodecContext *avctx,
const AVFrame *pict)
> for (pass = 0; pass < NB_PASSES; pass++) {
> /* NOTE: a pass is completely omitted if no pixels would be
> * output */
> - pass_row_size = ff_png_pass_row_size(pass,
s->bits_per_pixel, avctx->width);
> + pass_row_size = ff_png_pass_row_size(pass,
s->bits_per_pixel, pict->width);
> if (pass_row_size > 0) {
> top = NULL;
> - for (y = 0; y < avctx->height; y++)
> + for (y = 0; y < pict->height; y++)
> if ((ff_png_pass_ymask[pass] << (y & 7)) & 0x80) {
> ptr = p->data[0] + y * p->linesize[0];
> FFSWAP(uint8_t *, progressive_buf, top_buf);
> png_get_interlaced_row(progressive_buf,
pass_row_size,
> s->bits_per_pixel, pass,
> - ptr, avctx->width);
> + ptr, pict->width);
> crow = png_choose_filter(s, crow_buf,
progressive_buf,
> top, pass_row_size,
s->bits_per_pixel >> 3);
> png_write_row(avctx, crow, pass_row_size + 1);
> @@ -449,7 +463,7 @@ static int encode_frame(AVCodecContext *avctx, const
AVFrame *pict)
> }
> } else {
> top = NULL;
> - for (y = 0; y < avctx->height; y++) {
> + for (y = 0; y < pict->height; y++) {
> ptr = p->data[0] + y * p->linesize[0];
> crow = png_choose_filter(s, crow_buf, ptr, top,
> row_size, s->bits_per_pixel >> 3);
> @@ -530,6 +544,275 @@ static int encode_png(AVCodecContext *avctx,
AVPacket *pkt,
> return 0;
> }
>
> +static int apng_do_inverse_blend(AVFrame *output, const AVFrame *input,
> + APNGFctlChunk *fctl_chunk, uint8_t bpp)
> +{
> + // output: background, input: foreground
> + // output the image such that when blended with the background, will
produce the foreground
> +
> + uint32_t x, y;
> + uint32_t leftmost_x = input->width;
> + uint32_t rightmost_x = 0;
> + uint32_t topmost_y = input->height;
> + uint32_t bottommost_y = 0;
> + const uint8_t *input_data = input->data[0];
> + uint8_t *output_data = output->data[0];
> + size_t input_linesize = input->linesize[0];
> + size_t output_linesize;
> +
> + // Find bounding box of changes
> + for (y = 0; y < input->height; ++y) {
> + for (x = 0; x < input->width; ++x) {
> + if (!memcmp(input_data + bpp * x, output_data + bpp * x,
bpp))
> + continue;
> +
> + if (x < leftmost_x)
> + leftmost_x = x;
> + if (x >= rightmost_x)
> + rightmost_x = x + 1;
> + if (y < topmost_y)
> + topmost_y = y;
> + if (y >= bottommost_y)
> + bottommost_y = y + 1;
> + }
> +
> + input_data += input_linesize;
> + output_data += input_linesize;
This is wrong, and I think causes random artifacts in the output.
> + }
> +
> + if (leftmost_x == input->width && rightmost_x == 0) {
> + // Empty frame
> + // APNG does not support empty frames, so we make it a 1x1 frame
> + leftmost_x = topmost_y = 0;
> + rightmost_x = bottommost_y = 1;
> + }
> +
> + output_linesize = FFALIGN(output->width * bpp, 32);
Again wrong, leave linesize as it is.
> +
> + // Do actual inverse blending
> + if (fctl_chunk->blend_op == APNG_BLEND_OP_SOURCE) {
> + output_data = output->data[0];
> + for (y = topmost_y; y < bottommost_y; ++y) {
> + memcpy(output_data,
> + input->data[0] + input_linesize * y + bpp *
leftmost_x,
> + bpp * (rightmost_x - leftmost_x));
> + output_data += output_linesize;
> + }
> + } else { // APNG_BLEND_OP_OVER
> + size_t transparent_palette_index;
> + uint32_t *palette;
> +
> + switch (input->format) {
> + case AV_PIX_FMT_RGBA64BE:
> + case AV_PIX_FMT_YA16BE:
> + case AV_PIX_FMT_RGBA:
> + case AV_PIX_FMT_GRAY8A:
> + break;
> +
> + case AV_PIX_FMT_PAL8:
> + palette = (uint32_t*)input->data[1];
> + for (transparent_palette_index = 0;
transparent_palette_index < 256; ++transparent_palette_index)
> + if (palette[transparent_palette_index] >> 24 == 0)
> + break;
> + break;
> +
> + default:
> + // No alpha, so blending not possible
> + return -1;
> + }
> +
> + for (y = topmost_y; y < bottommost_y; ++y) {
> + uint8_t *foreground = input->data[0] + input_linesize * y +
bpp * leftmost_x;
> + uint8_t *background = output->data[0] + input_linesize * y +
bpp * leftmost_x;
You are using wrong linesize here.
> + output_data = output->data[0] + output_linesize * (y -
topmost_y);
> + for (x = leftmost_x; x < rightmost_x; ++x, foreground +=
bpp, background += bpp, output_data += bpp) {
> + if (!memcmp(foreground, background, bpp)) {
> + if (input->format == AV_PIX_FMT_PAL8) {
> + if (transparent_palette_index == 256) {
> + // Need fully transparent colour, but none
exists
> + return -1;
> + }
> +
> + *output_data = transparent_palette_index;
> + } else {
> + memset(output_data, 0, bpp);
> + }
> + continue;
> + }
> +
> + // Check for special alpha values, since full inverse
> + // alpha-on-alpha blending is rarely possible, and when
> + // possible, doesn't compress much better than
> + // APNG_BLEND_OP_SOURCE blending
> + switch (input->format) {
> + case AV_PIX_FMT_RGBA64BE:
> + if (((uint16_t*)foreground)[3] == 0xffff ||
> + ((uint16_t*)background)[3] == 0)
> + break;
> + return -1;
> +
> + case AV_PIX_FMT_YA16BE:
> + if (((uint16_t*)foreground)[1] == 0xffff ||
> + ((uint16_t*)background)[1] == 0)
> + break;
> + return -1;
> +
> + case AV_PIX_FMT_RGBA:
> + if (foreground[3] == 0xff || background[3] == 0)
> + break;
> + return -1;
> +
> + case AV_PIX_FMT_GRAY8A:
> + if (foreground[1] == 0xff || background[1] == 0)
> + break;
> + return -1;
> +
> + case AV_PIX_FMT_PAL8:
> + if (palette[*foreground] >> 24 == 0xff ||
> + palette[*background] >> 24 == 0)
> + break;
> + return -1;
> + }
> +
> + memmove(output_data, foreground, bpp);
> + }
> + }
> + }
> +
> + output->width = rightmost_x - leftmost_x;
> + output->height = bottommost_y - topmost_y;
> + output->linesize[0] = output_linesize;
Nope, can't do this.
> + fctl_chunk->width = output->width;
> + fctl_chunk->height = output->height;
> + fctl_chunk->x_offset = leftmost_x;
> + fctl_chunk->y_offset = topmost_y;
> +
> + return 0;
> +}
> +
> +static int apng_encode_frame(AVCodecContext *avctx, const AVFrame *pict,
> + APNGFctlChunk *best_fctl_chunk,
APNGFctlChunk *best_last_fctl_chunk)
> +{
> + PNGEncContext *s = avctx->priv_data;
> + int ret;
> + uint32_t y;
> + AVFrame* diffFrame;
> + uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
> + uint8_t *original_bytestream, *original_bytestream_end;
> + uint8_t *temp_bytestream = 0, *temp_bytestream_end;
> + uint32_t best_sequence_number;
> + uint8_t *best_bytestream;
> + size_t best_bytestream_size = SIZE_MAX;
> + APNGFctlChunk last_fctl_chunk = *best_last_fctl_chunk;
> + APNGFctlChunk fctl_chunk = *best_fctl_chunk;
> +
> + if (avctx->frame_number == 0) {
> + best_fctl_chunk->width = pict->width;
> + best_fctl_chunk->height = pict->height;
> + best_fctl_chunk->x_offset = 0;
> + best_fctl_chunk->y_offset = 0;
> + best_fctl_chunk->blend_op = APNG_BLEND_OP_SOURCE;
> + return encode_frame(avctx, pict);
> + }
> +
> + diffFrame = av_frame_alloc();
> + if (!diffFrame)
> + return AVERROR(ENOMEM);
> +
> + diffFrame->format = pict->format;
> + diffFrame->width = pict->width;
> + diffFrame->height = pict->height;
> + if ((ret = av_frame_get_buffer(diffFrame, 32)) < 0)
> + goto fail;
> +
> + original_bytestream = s->bytestream;
> + original_bytestream_end = s->bytestream_end;
> +
> + temp_bytestream = av_malloc(original_bytestream_end -
original_bytestream);
> + temp_bytestream_end = temp_bytestream + (original_bytestream_end -
original_bytestream);
> + if (!temp_bytestream) {
> + ret = AVERROR(ENOMEM);
> + goto fail;
> + }
> +
> + for (last_fctl_chunk.dispose_op = 0; last_fctl_chunk.dispose_op < 3;
++last_fctl_chunk.dispose_op) {
> + // 0: APNG_DISPOSE_OP_NONE
> + // 1: APNG_DISPOSE_OP_BACKGROUND
> + // 2: APNG_DISPOSE_OP_PREVIOUS
> +
> + for (fctl_chunk.blend_op = 0; fctl_chunk.blend_op < 2;
++fctl_chunk.blend_op) {
> + // 0: APNG_BLEND_OP_SOURCE
> + // 1: APNG_BLEND_OP_OVER
> +
> + uint32_t original_sequence_number = s->sequence_number,
sequence_number;
> + uint8_t *bytestream_start = s->bytestream;
> + size_t bytestream_size;
> +
> + // Do disposal
> + if (last_fctl_chunk.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
> + memcpy(diffFrame->data[0], s->last_frame->data[0],
> + s->last_frame->linesize[0] *
s->last_frame->height);
> +
> + if (last_fctl_chunk.dispose_op ==
APNG_DISPOSE_OP_BACKGROUND) {
> + for (y = last_fctl_chunk.y_offset; y <
last_fctl_chunk.y_offset + last_fctl_chunk.height; ++y) {
> + size_t row_start = s->last_frame->linesize[0] *
y + bpp * last_fctl_chunk.x_offset;
> + memset(diffFrame->data[0] + row_start, 0, bpp *
last_fctl_chunk.width);
> + }
> + }
> + } else {
> + if (!s->prev_frame)
> + continue;
> +
> + memcpy(diffFrame->data[0], s->prev_frame->data[0],
> + s->prev_frame->linesize[0] *
s->prev_frame->height);
> + }
> +
> + // Do inverse blending
> + if (apng_do_inverse_blend(diffFrame, pict, &fctl_chunk, bpp)
< 0)
> + continue;
> +
> + // Do encoding
> + ret = encode_frame(avctx, diffFrame);
> + sequence_number = s->sequence_number;
> + s->sequence_number = original_sequence_number;
> + bytestream_size = s->bytestream - bytestream_start;
> + s->bytestream = bytestream_start;
> + if (ret < 0)
> + goto fail;
> +
> + if (bytestream_size < best_bytestream_size) {
> + *best_fctl_chunk = fctl_chunk;
> + *best_last_fctl_chunk = last_fctl_chunk;
> +
> + best_sequence_number = sequence_number;
> + best_bytestream = s->bytestream;
> + best_bytestream_size = bytestream_size;
> +
> + if (best_bytestream == original_bytestream) {
> + s->bytestream = temp_bytestream;
> + s->bytestream_end = temp_bytestream_end;
> + } else {
> + s->bytestream = original_bytestream;
> + s->bytestream_end = original_bytestream_end;
> + }
> + }
> + }
> + }
> +
> + s->sequence_number = best_sequence_number;
> + s->bytestream = original_bytestream + best_bytestream_size;
> + s->bytestream_end = original_bytestream_end;
> + if (best_bytestream != original_bytestream)
> + memcpy(original_bytestream, best_bytestream,
best_bytestream_size);
> +
> + ret = 0;
> +
> +fail:
> + av_freep(&temp_bytestream);
> + av_frame_free(&diffFrame);
> + return ret;
> +}
> +
> static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
> const AVFrame *pict, int *got_packet)
> {
> @@ -537,9 +820,9 @@ static int encode_apng(AVCodecContext *avctx,
AVPacket *pkt,
> int ret;
> int enc_row_size;
> size_t max_packet_size;
> - uint8_t buf[26];
> + APNGFctlChunk fctl_chunk;
>
> - if (avctx->codec_id == AV_CODEC_ID_APNG && s->color_type ==
PNG_COLOR_TYPE_PALETTE) {
> + if (pict && avctx->codec_id == AV_CODEC_ID_APNG && s->color_type ==
PNG_COLOR_TYPE_PALETTE) {
> uint32_t checksum = ~av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE),
~0U, pict->data[1], 256 * sizeof(uint32_t));
>
> if (avctx->frame_number == 0) {
> @@ -560,47 +843,111 @@ static int encode_apng(AVCodecContext *avctx,
AVPacket *pkt,
> );
> if (max_packet_size > INT_MAX)
> return AVERROR(ENOMEM);
> - ret = ff_alloc_packet2(avctx, pkt, max_packet_size);
> - if (ret < 0)
> - return ret;
> -
> - s->bytestream_start =
> - s->bytestream = pkt->data;
> - s->bytestream_end = pkt->data + pkt->size;
>
> if (avctx->frame_number == 0) {
> + s->bytestream = avctx->extradata = av_malloc(FF_MIN_BUFFER_SIZE);
> + if (!avctx->extradata)
> + return AVERROR(ENOMEM);
> +
> ret = encode_headers(avctx, pict);
> if (ret < 0)
> return ret;
>
> - avctx->extradata = av_malloc(s->bytestream -
s->bytestream_start);
> - if (!avctx->extradata)
> + avctx->extradata_size = s->bytestream - avctx->extradata;
> +
> + s->last_frame_packet = av_malloc(max_packet_size);
> + if (!s->last_frame_packet)
> return AVERROR(ENOMEM);
> - avctx->extradata_size = s->bytestream - s->bytestream_start;
> - memcpy(avctx->extradata, s->bytestream_start, s->bytestream -
s->bytestream_start);
> + } else if (s->last_frame) {
> + ret = ff_alloc_packet2(avctx, pkt, max_packet_size);
> + if (ret < 0)
> + return ret;
>
> - s->bytestream = s->bytestream_start;
> + memcpy(pkt->data, s->last_frame_packet,
s->last_frame_packet_size);
> + pkt->size = s->last_frame_packet_size;
> + pkt->pts = pkt->dts = s->last_frame->pts;
> }
>
> - AV_WB32(buf, s->sequence_number);
> - AV_WB32(buf + 4, avctx->width);
> - AV_WB32(buf + 8, avctx->height);
> - AV_WB32(buf + 12, 0); // x offset
> - AV_WB32(buf + 16, 0); // y offset
> - AV_WB16(buf + 20, 0); // delay numerator (filled in during muxing)
> - AV_WB16(buf + 22, 0); // delay denominator
> - buf[24] = APNG_DISPOSE_OP_BACKGROUND;
> - buf[25] = APNG_BLEND_OP_SOURCE;
> - png_write_chunk(&s->bytestream, MKTAG('f', 'c', 'T', 'L'), buf, 26);
> - ++s->sequence_number;
> + if (pict) {
> + s->bytestream_start =
> + s->bytestream = s->last_frame_packet;
> + s->bytestream_end = s->bytestream + max_packet_size;
>
> - ret = encode_frame(avctx, pict);
> - if (ret < 0)
> - return ret;
> + // We're encoding the frame first, so we have to do a bit of
shuffling around
> + // to have the image data write to the correct place in the
buffer
> + fctl_chunk.sequence_number = s->sequence_number;
> + ++s->sequence_number;
> + s->bytestream += 26 + 12;
>
> - pkt->size = s->bytestream - s->bytestream_start;
> - pkt->flags |= AV_PKT_FLAG_KEY;
> - *got_packet = 1;
> + ret = apng_encode_frame(avctx, pict, &fctl_chunk,
&s->last_frame_fctl);
> + if (ret < 0)
> + return ret;
> +
> + fctl_chunk.delay_num = 0; // delay filled in during muxing
> + fctl_chunk.delay_den = 0;
> + } else {
> + s->last_frame_fctl.dispose_op = APNG_DISPOSE_OP_NONE;
> + }
> +
> + if (s->last_frame) {
> + uint8_t* last_fctl_chunk_start = pkt->data;
> + uint8_t buf[26];
> +
> + AV_WB32(buf + 0, s->last_frame_fctl.sequence_number);
> + AV_WB32(buf + 4, s->last_frame_fctl.width);
> + AV_WB32(buf + 8, s->last_frame_fctl.height);
> + AV_WB32(buf + 12, s->last_frame_fctl.x_offset);
> + AV_WB32(buf + 16, s->last_frame_fctl.y_offset);
> + AV_WB16(buf + 20, s->last_frame_fctl.delay_num);
> + AV_WB16(buf + 22, s->last_frame_fctl.delay_den);
> + buf[24] = s->last_frame_fctl.dispose_op;
> + buf[25] = s->last_frame_fctl.blend_op;
> + png_write_chunk(&last_fctl_chunk_start, MKTAG('f', 'c', 'T',
'L'), buf, 26);
> +
> + *got_packet = 1;
> + }
> +
> + if (pict) {
> + if (!s->last_frame) {
> + s->last_frame = av_frame_alloc();
> + if (!s->last_frame)
> + return AVERROR(ENOMEM);
> + } else if (s->last_frame_fctl.dispose_op !=
APNG_DISPOSE_OP_PREVIOUS) {
> + if (!s->prev_frame) {
> + s->prev_frame = av_frame_alloc();
> + if (!s->prev_frame)
> + return AVERROR(ENOMEM);
> +
> + s->prev_frame->format = pict->format;
> + s->prev_frame->width = pict->width;
> + s->prev_frame->height = pict->height;
> + if ((ret = av_frame_get_buffer(s->prev_frame, 32)) < 0)
> + return ret;
> + }
> +
> + // Do disposal, but not blending
> + memcpy(s->prev_frame->data[0], s->last_frame->data[0],
> + s->last_frame->linesize[0] * s->last_frame->height);
> + if (s->last_frame_fctl.dispose_op ==
APNG_DISPOSE_OP_BACKGROUND) {
> + uint32_t y;
> + uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
> + for (y = s->last_frame_fctl.y_offset; y <
s->last_frame_fctl.y_offset + s->last_frame_fctl.height; ++y) {
> + size_t row_start = s->last_frame->linesize[0] * y +
bpp * s->last_frame_fctl.x_offset;
> + memset(s->prev_frame->data[0] + row_start, 0, bpp *
s->last_frame_fctl.width);
> + }
> + }
> + }
> +
> + av_frame_unref(s->last_frame);
> + ret = av_frame_ref(s->last_frame, (AVFrame*)pict);
> + if (ret < 0)
> + return ret;
> +
> + s->last_frame_fctl = fctl_chunk;
> + s->last_frame_packet_size = s->bytestream - s->bytestream_start;
> + } else {
> + av_frame_free(&s->last_frame);
> + }
>
> return 0;
> }
> @@ -714,6 +1061,9 @@ static av_cold int png_enc_close(AVCodecContext
*avctx)
>
> deflateEnd(&s->zstream);
> av_frame_free(&avctx->coded_frame);
> + av_frame_free(&s->last_frame);
> + av_frame_free(&s->prev_frame);
> + av_freep(&s->last_frame_packet);
> return 0;
> }
>
> @@ -769,6 +1119,7 @@ AVCodec ff_apng_encoder = {
> .init = png_enc_init,
> .close = png_enc_close,
> .encode2 = encode_apng,
> + .capabilities = CODEC_CAP_DELAY,
> .pix_fmts = (const enum AVPixelFormat[]) {
> AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
> AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE,
> --
> 2.4.5
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
More information about the ffmpeg-devel
mailing list