[FFmpeg-devel] [PATCH 5/5] [RFC] libavcodec/ffv1enc: Support storing LSB raw

Thu Oct 10 23:45:28 EEST 2024

This makes a 16bit RGB raw sample 25% faster at a 2% loss of compression with rawlsb=4

Please test and comment, especially if you are an archivist caring about compression and speed
Id like to know if this is a direction (that is trading compression against speed) that
is wanted

Note, this only implements the encoder side, you cannot decode this ATM, its only for testing
compression and speed

Signed-off-by: Michael Niedermayer <michael at niedermayer.cc>
---
 libavcodec/ffv1.h             |   1 +
 libavcodec/ffv1enc.c          | 135 +++++++++++++++++++++++++++++++---
 libavcodec/ffv1enc_template.c |   8 ++
 3 files changed, 134 insertions(+), 10 deletions(-)

diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index b98f0b36855..0a8790fdb1b 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -136,6 +136,7 @@ typedef struct FFV1Context {
     int intra;
     int key_frame_ok;
     int context_model;
+    int rawlsb;
 
     int bits_per_raw_sample;
     int packed_at_lsb;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 30d8073c8d4..ef139d0f4e7 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -290,17 +290,17 @@ static int encode_plane(FFV1Context *f, FFV1SliceContext *sc,
         sample[1][ w]= sample[1][w-1];
         if (f->bits_per_raw_sample <= 8) {
             for (x = 0; x < w; x++)
-                sample[0][x] = src[x * pixel_stride + stride * y];
+                sample[0][x] = src[x * pixel_stride + stride * y] >> f->rawlsb;
             if((ret = encode_line(f, sc, f->avctx, w, sample, plane_index, 8, ac, pass1)) < 0)
                 return ret;
         } else {
             if (f->packed_at_lsb) {
                 for (x = 0; x < w; x++) {
-                    sample[0][x] = ((uint16_t*)(src + stride*y))[x];
+                    sample[0][x] = ((uint16_t*)(src + stride*y))[x] >> f->rawlsb;
                 }
             } else {
                 for (x = 0; x < w; x++) {
-                    sample[0][x] = ((uint16_t*)(src + stride*y))[x] >> (16 - f->bits_per_raw_sample);
+                    sample[0][x] = ((uint16_t*)(src + stride*y))[x] >> (16 - f->bits_per_raw_sample + f->rawlsb);
                 }
             }
             if((ret = encode_line(f, sc, f->avctx, w, sample, plane_index, f->bits_per_raw_sample, ac, pass1)) < 0)
@@ -310,6 +310,82 @@ static int encode_plane(FFV1Context *f, FFV1SliceContext *sc,
     return 0;
 }
 
+static int encode_plane_rawlsb(FFV1Context *f, FFV1SliceContext *sc,
+                            const uint8_t *src, int w, int h,
+                            int stride, int plane_index, int pixel_stride)
+{
+    int x, y;
+    unsigned masklsb = (1 << f->rawlsb) - 1;
+    PutBitContext *pb = &sc->pb;
+
+    for (y = 0; y < h; y++) {
+        if (f->bits_per_raw_sample <= 8) {
+            for (x = 0; x < w; x++)
+                put_bits(pb, f->rawlsb, src[x * pixel_stride + stride * y] & masklsb);
+        } else {
+            if (f->packed_at_lsb) {
+                for (x = 0; x < w; x++)
+                    put_bits(pb, f->rawlsb, ((uint16_t*)(src + stride*y))[x] & masklsb);
+            } else {
+                for (x = 0; x < w; x++)
+                    put_bits(pb, f->rawlsb, (((uint16_t*)(src + stride*y))[x] >> (16 - f->bits_per_raw_sample)) & masklsb);
+            }
+        }
+    }
+    return 0;
+}
+
+static int encode_rgb_frame_rawlsb(FFV1Context *f, FFV1SliceContext *sc,
+                                    const uint8_t *src[4],
+                                    int w, int h, const int stride[4])
+{
+    unsigned masklsb = (1 << f->rawlsb) - 1;
+    PutBitContext *pb = &sc->pb;
+    int x, y;
+    int lbd    = f->bits_per_raw_sample <= 8;
+    int packed = !src[1];
+    int transparency = f->transparency;
+    int packed_size = (3 + transparency)*2;
+
+    for (y = 0; y < h; y++) {
+        for (x = 0; x < w; x++) {
+            int b, g, r, av_uninit(a);
+            if (lbd) {
+                unsigned v = *((const uint32_t*)(src[0] + x*4 + stride[0]*y));
+                b =  v        & 0xFF;
+                g = (v >>  8) & 0xFF;
+                r = (v >> 16) & 0xFF;
+                a =  v >> 24;
+            } else if (packed) {
+                const uint16_t *p = ((const uint16_t*)(src[0] + x*packed_size + stride[0]*y));
+                r = p[0];
+                g = p[1];
+                b = p[2];
+                if (transparency)
+                  a = p[3];
+            } else if (sizeof(TYPE) == 4 || transparency) {
+                g = *((const uint16_t *)(src[0] + x*2 + stride[0]*y));
+                b = *((const uint16_t *)(src[1] + x*2 + stride[1]*y));
+                r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y));
+                if (transparency)
+                    a = *((const uint16_t *)(src[3] + x*2 + stride[3]*y));
+            } else {
+                b = *((const uint16_t *)(src[0] + x*2 + stride[0]*y));
+                g = *((const uint16_t *)(src[1] + x*2 + stride[1]*y));
+                r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y));
+            }
+
+            put_bits(pb, f->rawlsb, r & masklsb);
+            put_bits(pb, f->rawlsb, g & masklsb);
+            put_bits(pb, f->rawlsb, b & masklsb);
+            if (transparency)
+                put_bits(pb, f->rawlsb, a & masklsb);
+        }
+    }
+    return 0;
+}
+
+
 static void write_quant_table(RangeCoder *c, int16_t *quant_table)
 {
     int last = 0;
@@ -564,6 +640,9 @@ static av_cold int encode_init(AVCodecContext *avctx)
     if (s->ec == 2)
         s->version = FFMAX(s->version, 4);
 
+    if (s->rawlsb)
+        s->version = FFMAX(s->version, 4);
+
     if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
         av_log(avctx, AV_LOG_ERROR, "Version 2 or 4 needed for requested features but version 2 or 4 is experimental and not enabled\n");
         return AVERROR_INVALIDDATA;
@@ -716,6 +795,11 @@ static av_cold int encode_init(AVCodecContext *avctx)
         }
     }
 
+    if (s->rawlsb > s->bits_per_raw_sample) {
+        av_log(avctx, AV_LOG_ERROR, "too many raw lsb\n");
+        return AVERROR(EINVAL);
+    }
+
     if (s->ac == AC_RANGE_CUSTOM_TAB) {
         for (i = 1; i < 256; i++)
             s->state_transition[i] = ver2_state[i];
@@ -958,6 +1042,7 @@ static void encode_slice_header(FFV1Context *f, FFV1SliceContext *sc)
             put_symbol(c, state, sc->slice_rct_by_coef, 0);
             put_symbol(c, state, sc->slice_rct_ry_coef, 0);
         }
+//         put_symbol(c, state, f->rawlsb, 0);
     }
 }
 
@@ -1113,13 +1198,6 @@ retry:
         ret = encode_rgb_frame(f, sc, planes, width, height, p->linesize);
     }
 
-    if (f->ac != AC_GOLOMB_RICE) {
-        sc->ac_byte_count = ff_rac_terminate(&sc->c, 1);
-    } else {
-        flush_put_bits(&sc->pb); // FIXME: nicer padding
-        sc->ac_byte_count += put_bytes_output(&sc->pb);
-    }
-
     if (ret < 0) {
         av_assert0(sc->slice_coding_mode == 0);
         if (f->version < 4 || !f->ac) {
@@ -1132,6 +1210,41 @@ retry:
         goto retry;
     }
 
+    if (f->ac != AC_GOLOMB_RICE) {
+        sc->ac_byte_count = ff_rac_terminate(&sc->c, 1);
+        init_put_bits(&sc->pb,
+                      sc->c.bytestream_start + sc->ac_byte_count,
+                      sc->c.bytestream_end - sc->c.bytestream_start - sc->ac_byte_count);
+    }
+
+    if (f->rawlsb) {
+        if (f->colorspace == 0 && c->pix_fmt != AV_PIX_FMT_YA8) {
+            const int chroma_width  = AV_CEIL_RSHIFT(width,  f->chroma_h_shift);
+            const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
+            const int cx            = x >> f->chroma_h_shift;
+            const int cy            = y >> f->chroma_v_shift;
+
+            encode_plane_rawlsb(f, sc, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 1);
+
+            if (f->chroma_planes) {
+                encode_plane_rawlsb(f, sc, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1, 1);
+                encode_plane_rawlsb(f, sc, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1, 1);
+            }
+            if (f->transparency)
+                encode_plane_rawlsb(f, sc, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], 2, 1);
+        } else if (c->pix_fmt == AV_PIX_FMT_YA8) {
+            encode_plane_rawlsb(f, sc, p->data[0] +     ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 2);
+            encode_plane_rawlsb(f, sc, p->data[0] + 1 + ps*x + y*p->linesize[0], width, height, p->linesize[0], 1, 2);
+        } else {
+            encode_rgb_frame_rawlsb(f, sc, planes, width, height, p->linesize);
+        }
+    }
+
+    if (f->ac == AC_GOLOMB_RICE || f->rawlsb) {
+        flush_put_bits(&sc->pb); // FIXME: nicer padding
+        sc->ac_byte_count += put_bytes_output(&sc->pb);
+    }
+
     return 0;
 }
 
@@ -1289,6 +1402,8 @@ static const AVOption options[] = {
             { .i64 = 1 }, INT_MIN, INT_MAX, VE, .unit = "coder" },
     { "context", "Context model", OFFSET(context_model), AV_OPT_TYPE_INT,
             { .i64 = 0 }, 0, 1, VE },
+    { "rawlsb", "number of LSBs stored RAW", OFFSET(rawlsb), AV_OPT_TYPE_INT,
+            { .i64 = 0 }, 0, 16, VE },
 
     { NULL }
 };
diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c
index bc14926ab95..6e0ae10a15d 100644
--- a/libavcodec/ffv1enc_template.c
+++ b/libavcodec/ffv1enc_template.c
@@ -180,6 +180,14 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc,
                 r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y));
             }
 
+            if (f->rawlsb) {
+                r >>= f->rawlsb;
+                g >>= f->rawlsb;
+                b >>= f->rawlsb;
+                if (transparency)
+                    a >>= f->rawlsb;
+            }
+
             if (sc->slice_coding_mode != 1) {
                 b -= g;
                 r -= g;
-- 
2.47.0