[FFmpeg-devel] [PATCH] MS Video 1 encoder, take 2
Kostya
kostya.shishkov
Fri Mar 13 17:34:21 CET 2009
On Wed, Mar 11, 2009 at 08:37:00PM +0100, Michael Niedermayer wrote:
> On Wed, Mar 11, 2009 at 08:11:39AM +0200, Kostya wrote:
> > $subj
> >
> > It is quality-based encoder since this codec is not fit for bitrate-based encoding,
> > so it should be run as ffmpeg -i infile -vcodec msvideo1 -qscale 3 output.avi
>
[...]
>
> > +/**
> > + * Encoder context
> > + */
> > +typedef struct Msvideo1EncContext {
> > + AVCodecContext *avctx;
> > + AVFrame pic;
> > + AVLFG rnd;
> > + uint8_t *prev;
> > +
> > + int block[16*3];
> > + int block2[16*3];
> > + int codebook[8*3];
> > + int codebook2[8*3];
> > + int output[16*3];
> > + int output2[16*3];
> > + int avg[3];
>
> i dont think all that belongs in the context, some clearly are local vars
moved
> [...]
> > + for(i = 0; i < 4*4*3; i++){
> > + int t = prevptr[i] - c->block[i];
> > + bestscore += t*t;
> > + }
> > + if(!skips)
> > + bestscore += 2;
>
> this is not a correct method of combining rate and distortion
Ok, now I don't mix them at all.
> rest not reviewed, this is a grave error and has to be fixed first
Can you recommend any good book on such matters? I really need it.
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
-------------- next part --------------
Index: Changelog
===================================================================
--- Changelog (revision 17880)
+++ Changelog (working copy)
@@ -4,6 +4,7 @@
- deprecated vhook subsystem removed
- deprecated old scaler removed
- VQF demuxer
+- MS Video 1 15-bpp encoder
Index: libavcodec/Makefile
===================================================================
--- libavcodec/Makefile (revision 17880)
+++ libavcodec/Makefile (working copy)
@@ -152,6 +152,7 @@
OBJS-$(CONFIG_MSMPEG4V3_ENCODER) += msmpeg4.o msmpeg4data.o mpegvideo_enc.o motion_est.o ratecontrol.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_MSRLE_DECODER) += msrle.o msrledec.o
OBJS-$(CONFIG_MSVIDEO1_DECODER) += msvideo1.o
+OBJS-$(CONFIG_MSVIDEO1_ENCODER) += msvideo1enc.o elbg.o
OBJS-$(CONFIG_MSZH_DECODER) += lcldec.o
OBJS-$(CONFIG_NELLYMOSER_DECODER) += nellymoserdec.o nellymoser.o
OBJS-$(CONFIG_NELLYMOSER_ENCODER) += nellymoserenc.o nellymoser.o
Index: libavcodec/msvideo1enc.c
===================================================================
--- libavcodec/msvideo1enc.c (revision 0)
+++ libavcodec/msvideo1enc.c (revision 0)
@@ -0,0 +1,361 @@
+/*
+ * Microsoft Video-1 encoder
+ * Copyright (c) 2009 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/msvideo1enc.c
+ * Microsoft Video-1 encoder
+ */
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "libavutil/lfg.h"
+#include "elbg.h"
+
+/**
+ * Encoder context
+ */
+typedef struct Msvideo1EncContext {
+ AVCodecContext *avctx;
+ AVFrame pic;
+ AVLFG rnd;
+ uint8_t *prev;
+ int keyint;
+} Msvideo1EncContext;
+
+enum MSV1Mode{
+ MODE_SKIP = 0,
+ MODE_FILL,
+ MODE_2COL,
+ MODE_8COL,
+
+ NB_MODES
+};
+
+#define SKIP_PREFIX 0x8400
+#define SKIPS_MAX 0x03FF
+#define MKRGB555(in, off) ((in[off] << 10) | (in[off + 1] << 5) | (in[off + 2]))
+
+static const uint8_t remap_8col[16] = { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 };
+
+static inline int calc_skip(Msvideo1EncContext *c, uint8_t *prev, int *src)
+{
+ int score = 0;
+ int i;
+
+ for (i = 0; i < 4*4*3; i++) {
+ int t = prev[i] - src[i];
+ score += t*t;
+ }
+ return score;
+}
+
+static inline int calc_fill(Msvideo1EncContext *c, uint8_t *prev, int *src,
+ int *avg)
+{
+ int score = 0;
+ int i, j;
+
+ score = 0;
+ for (i = 0; i < 3; i++)
+ avg[i] = 0;
+ for (i = 0; i < 4*4*3; i += 3)
+ for (j = 0; j < 3; j++)
+ avg[j] += src[i+j];
+ for (i = 0; i < 3; i++)
+ avg[i] = (avg[i] + 8) >> 4;
+ if (avg[0] == 1) // red component = 1 will be written as skip code
+ avg[0] = 0;
+ for (i = 0; i < 4*4*3; i += 3) {
+ for (j = 0; j < 3; j++) {
+ int t = avg[j] - src[i+j];
+ score += t*t;
+ }
+ }
+ return score;
+}
+
+static inline int calc_quant2(Msvideo1EncContext *c, uint8_t *prev, int *src,
+ int codebook[8*3], int output[16])
+{
+ int score = 0;
+ int i, j;
+
+ ff_init_elbg(src, 3, 16, codebook, 2, 1, output, &c->rnd);
+ ff_do_elbg (src, 3, 16, codebook, 2, 1, output, &c->rnd);
+ // last output value should be always 1, swap codebooks if needed
+ if (!output[15]) {
+ for (i = 0; i < 3; i++)
+ FFSWAP(uint8_t, codebook[i], codebook[i+3]);
+ for (i = 0; i < 16; i++)
+ output[i] ^= 1;
+ }
+ for (i = 0; i < 4*4; i++) {
+ for (j = 0; j < 3; j++) {
+ int t = codebook[output[i]*3 + j] - src[i*3+j];
+ score += t*t;
+ }
+ }
+ return score;
+}
+
+static inline int calc_quant8(Msvideo1EncContext *c, uint8_t *prev, int *src,
+ int codebook[8*3], int output[16])
+{
+ int score = 0;
+ int i, j;
+
+ for (i = 0; i < 4; i++) {
+ ff_init_elbg(src + i*4*3, 3, 4, codebook + i*2*3, 2, 1, output + i*4, &c->rnd);
+ ff_do_elbg (src + i*4*3, 3, 4, codebook + i*2*3, 2, 1, output + i*4, &c->rnd);
+ }
+ // last value should be always 1, swap codebooks if needed
+ if (!output[15]) {
+ for (i = 0; i < 3; i++)
+ FFSWAP(uint8_t, codebook[i+18], codebook[i+21]);
+ for (i = 12; i < 16; i++)
+ output[i] ^= 1;
+ }
+ for (i = 0; i < 4*4; i++) {
+ for (j = 0; j < 3; j++) {
+ int t = codebook[(output[remap_8col[i]] + (i&2) + ((i&8)>>1))*3+j] - src[i*3+j];
+ score += t*t;
+ }
+ }
+ return score;
+}
+
+static void encode_block(int mode, uint8_t **dst,
+ int *avg,
+ int codebook_2col[8*3], int output_2col[16],
+ int codebook_8col[8*3], int output_8col[16])
+{
+ int i;
+ int flags = 0;
+
+ switch (mode) {
+ case MODE_FILL:
+ bytestream_put_le16(dst, MKRGB555(avg, 0) | 0x8000);
+ break;
+ case MODE_2COL:
+ for (i = 0; i < 4*4; i++)
+ flags |= (output_2col[i] ^ 1) << i;
+ bytestream_put_le16(dst, flags);
+ bytestream_put_le16(dst, MKRGB555(codebook_2col, 0));
+ bytestream_put_le16(dst, MKRGB555(codebook_2col, 3));
+ break;
+ case MODE_8COL:
+ for (i = 0; i < 4*4; i++)
+ flags |= (output_8col[remap_8col[i]] ^ 1) << i;
+ bytestream_put_le16(dst, flags);
+ bytestream_put_le16(dst, MKRGB555(codebook_8col, 0) | 0x8000);
+ for (i = 3; i < 24; i += 3)
+ bytestream_put_le16(dst, MKRGB555(codebook_8col, i));
+ break;
+ }
+}
+
+static void update_prev(int mode, uint8_t *prev, int *avg,
+ int codebook_2col[8*3], int output_2col[16],
+ int codebook_8col[8*3], int output_8col[16])
+{
+ int i, j;
+
+ switch (mode) {
+ case MODE_FILL:
+ for (i = 0; i < 4*4*3; i += 3)
+ for (j = 0; j < 3; j++)
+ prev[i+j] = avg[j];
+ break;
+ case MODE_2COL:
+ for (i = 0; i < 4*4; i++)
+ for (j = 0; j < 3; j++)
+ prev[i*3 + j] = codebook_2col[output_2col[i]*3 + j];
+ break;
+ case MODE_8COL:
+ for (i = 0; i < 4*4; i++)
+ for (j = 0; j < 3; j++)
+ prev[i*3 + j] = codebook_8col[(output_8col[remap_8col[i]]
+ + (i&2) + ((i&8)>>1))*3 + j];
+ break;
+ }
+}
+
+static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void *data)
+{
+ Msvideo1EncContext * const c = avctx->priv_data;
+ AVFrame *pict = data;
+ AVFrame * const p = &c->pic;
+ const uint16_t *src;
+ uint8_t *prevptr;
+ uint8_t *dst = buf;
+ int keyframe = 0;
+ int no_skips = 1;
+ int i, j, k, x, y;
+ int skips = 0;
+ int quality;
+
+ int block[16*3];
+ int block_8col[16*3];
+ int avg[3];
+ int codebook_2col[8*3];
+ int codebook_8col[8*3];
+ int output_2col[16];
+ int output_8col[16];
+ int dists[NB_MODES];
+
+ *p = *pict;
+ prevptr = c->prev;
+ src = (uint16_t*)(p->data[0] + p->linesize[0]*(((avctx->height + 3)&~3) - 1));
+ if (c->keyint >= avctx->keyint_min)
+ keyframe = 1;
+
+ p->quality = avctx->global_quality;
+ quality = p->quality;
+
+ for (y = 0; y < avctx->height; y += 4) {
+ for (x = 0; x < avctx->width; x += 4) {
+ int bestmode;
+
+ for (j = 0; j < 4; j++) {
+ for (i = 0; i < 4; i++) {
+ uint16_t val = src[x + i - j*p->linesize[0]/2];
+ for (k = 0; k < 3; k++) {
+ block [(i + j*4)*3 + k] = (val >> (10-k*5)) & 0x1F;
+ block_8col[remap_8col[i + j*4]*3 + k] = (val >> (10-k*5)) & 0x1F;
+ }
+ }
+ }
+
+ dists[MODE_SKIP] = keyframe ? INT_MAX : calc_skip(c, prevptr, block);
+ dists[MODE_FILL] = INT_MAX;//calc_fill (c, prevptr, block, avg);
+ dists[MODE_2COL] = INT_MAX;//calc_quant2(c, prevptr, block, codebook_2col, output_2col);
+ dists[MODE_8COL] = calc_quant8(c, prevptr, block_8col, codebook_8col, output_8col);
+
+ /* For now, first mode with distortion lower than the limit set by quality
+ * is chosen. Since modes arranged by ascending quality with high threshold
+ * low-quality (and coded with lower number of bits) modes are picked first.
+ *
+ * TODO: replace it with something better
+ */
+ for (i = 0; i < NB_MODES - 1; i++)
+ if (dists[i] < quality)
+ break;
+
+ bestmode = i;
+
+ if (bestmode == MODE_SKIP)
+ skips++;
+ if ((bestmode != MODE_SKIP && skips) || skips == SKIPS_MAX) {
+ bytestream_put_le16(&dst, skips | SKIP_PREFIX);
+ skips = 0;
+ }
+
+ /* coding costs:
+ * first skip in a row - 2 bytes
+ * consequent skips - 0 bytes
+ * one-color fill - 2 bytes
+ * 2-color fill - 6 bytes
+ * 8-color fill - 18 bytes
+ */
+ encode_block(bestmode, &dst, avg, codebook_2col, output_2col,
+ codebook_8col, output_8col);
+ update_prev (bestmode, prevptr, avg, codebook_2col, output_2col,
+ codebook_8col, output_8col);
+
+ if (skips)
+ no_skips = 0;
+ prevptr += 4*4*3;
+ }
+ src -= p->linesize[0] << 1;
+ }
+ if (skips)
+ bytestream_put_le16(&dst, skips | SKIP_PREFIX);
+ //EOF
+ bytestream_put_le16(&dst, 0);
+
+ if (no_skips)
+ keyframe = 1;
+ if (keyframe)
+ c->keyint = 0;
+ else
+ c->keyint++;
+ p->pict_type = keyframe ? FF_I_TYPE : FF_P_TYPE;
+ p->key_frame = keyframe;
+
+ return dst - buf;
+}
+
+
+/**
+ * init encoder
+ */
+static av_cold int encode_init(AVCodecContext *avctx)
+{
+ Msvideo1EncContext * const c = avctx->priv_data;
+
+ if (!(avctx->flags&CODEC_FLAG_QSCALE)) {
+ av_log(avctx, AV_LOG_ERROR, "This encoder works only with set quality, not bitrate\n");
+ return -1;
+ }
+
+ c->avctx = avctx;
+ if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
+ return -1;
+ }
+
+ avctx->coded_frame = (AVFrame*)&c->pic;
+
+ c->keyint = avctx->keyint_min;
+ av_lfg_init(&c->rnd, 0xDEADBEEF);
+
+ c->prev = av_malloc(((avctx->width + 3) & ~3) * ((avctx->height + 3) & ~3) * 3);
+ if (!c->prev) {
+ av_log(avctx, AV_LOG_ERROR, "Cannot allocate buffer");
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/**
+ * Uninit encoder
+ */
+static av_cold int encode_end(AVCodecContext *avctx)
+{
+ Msvideo1EncContext * const c = avctx->priv_data;
+
+ av_freep(&c->prev);
+
+ return 0;
+}
+
+AVCodec msvideo1_encoder = {
+ "msvideo1",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_MSVIDEO1,
+ sizeof(Msvideo1EncContext),
+ encode_init,
+ encode_frame,
+ encode_end,
+ .pix_fmts = (enum PixelFormat[]){PIX_FMT_RGB555, PIX_FMT_NONE},
+ .long_name = NULL_IF_CONFIG_SMALL("Microsoft Video-1"),
+};
Index: libavcodec/allcodecs.c
===================================================================
--- libavcodec/allcodecs.c (revision 17880)
+++ libavcodec/allcodecs.c (working copy)
@@ -116,7 +116,7 @@
REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2);
REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3);
REGISTER_DECODER (MSRLE, msrle);
- REGISTER_DECODER (MSVIDEO1, msvideo1);
+ REGISTER_ENCDEC (MSVIDEO1, msvideo1);
REGISTER_DECODER (MSZH, mszh);
REGISTER_DECODER (NUV, nuv);
REGISTER_ENCODER (PAM, pam);
Index: doc/general.texi
===================================================================
--- doc/general.texi (revision 17880)
+++ doc/general.texi (working copy)
@@ -373,7 +373,7 @@
@item LOCO @tab @tab X
@item lossless MJPEG @tab X @tab X
@item Microsoft RLE @tab @tab X
- at item Microsoft Video 1 @tab @tab X
+ at item Microsoft Video 1 @tab X @tab X
@item Mimic @tab @tab X
@tab Used in MSN Messenger Webcam streams.
@item Miro VideoXL @tab @tab X
More information about the ffmpeg-devel
mailing list