[FFmpeg-devel] [PATCH 1/2] (was Re: deduplicated [PATCH] Cinepak: speed up decoding several-fold...) formats.

u-9iep at aetey.se u-9iep at aetey.se
Sun Mar 5 20:26:30 EET 2017


For one week there was no substantial feedback on the patches.

I understand that the first patch was very large which makes it hard
to review.

That's why I now have produced a limited, minimalistic change which still
yields most of the improvement.

I kindly ask the reader to note that cinepak is not ffmpeg's everyday meal
i.e. fast shortcut judgements may be not applicable. Please take your time.

To avoid contention around some of the issues which triggered such
regrettable kind of judgements let me point out that

- letting a decoder produce multiple pixel formats is explicitly
  allowed and supported by the documented API (get_format())

- the patch does not introduce a new feature in this respect,
  just makes its presence more visible and much more useful

- the cinepak codec is extraordinary well suited to produce virtually
  any desired pixel format very efficiently (iow it is quite special)

- even though we have got a framework of libswscale, there is no point
  to use it where it can not help; it would be several times slower than
  cinepak itself, NOTE not by a coincidence but by the very design of both

- when some functionality is desired from libswscale, it remains fully
  usable with cinepak, with no loss in the actual efficiency of libswscale

- the 3-fold practical speedup makes the difference between working/useful
  and unusable/useless, with very little complexity (aka maintenance costs)

- the feedback concerning the coding style and code duplication was not
  specified in any measurable terms but it seems that all related issues
  have been resolved

Looking forward to your analysis and feedback.

Thanks.

Rune
-------------- next part --------------
>From cbe0664a13a615ecac302ffb0de577cd08b1f910 Mon Sep 17 00:00:00 2001
From: Rl <addr-see-the-website at aetey.se>
Date: Sun, 5 Mar 2017 15:54:06 +0100
Subject: [PATCH 1/2] Cinepak decoding: refactor for several-fold speedup

Refactored codebook generation and vector parsing
to better support the decoder API and allow for optimizations.

Decoding to rgb24 is now slightly faster.

Replaced generation of the constrained pal8 output
(which was only possible with palettized input)
with rgb565, comparably fast but working with any input format.

Decoding to rgb565 is now several-fold faster:
 (including overheads, underestimation of the actual decoding speedup)
 --------
 matrixbench_mpeg2.mpg (720x567) encoded with ffmpeg into Cinepak
 using default settings, decoding on an i5 3570K, 3.4 GHz:
 ...
 fast_bilinear:              ~65x realtime
 patch w/rgb565 override:    ~154x realtime
 --------
 https://ffmpeg.org/pipermail/ffmpeg-devel/2017-February/206799.html

The default output format is unchanged, rgb24.
---
 libavcodec/cinepak.c | 462 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 252 insertions(+), 210 deletions(-)

diff --git a/libavcodec/cinepak.c b/libavcodec/cinepak.c
index d657e9c0c1..d8bc7860eb 100644
--- a/libavcodec/cinepak.c
+++ b/libavcodec/cinepak.c
@@ -29,8 +29,8 @@
  * @see For more information on the quirky data inside Sega FILM/CPK files, visit:
  *   http://wiki.multimedia.cx/index.php?title=Sega_FILM
  *
- * Cinepak colorspace support (c) 2013 Rl, Aetey Global Technologies AB
- * @author Cinepak colorspace, Rl, Aetey Global Technologies AB
+ * Cinepak colorspace/optimizations (c) 2013-2017 Rl, Aetey Global Technologies AB
+ * @author Cinepak colorspace/optimizations, Rl, Aetey Global Technologies AB
  */
 
 #include <stdio.h>
@@ -39,24 +39,29 @@
 
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "internal.h"
 
+#define MAX_STRIPS      32    /* an arbitrary limit -- rl */
 
-typedef uint8_t cvid_codebook[12];
-
-#define MAX_STRIPS      32
+typedef union cvid_codebook {
+    uint8_t    rgb24[256][12];
+    uint16_t  rgb565[256][ 4];
+} cvid_codebook;
 
 typedef struct cvid_strip {
     uint16_t          id;
     uint16_t          x1, y1;
     uint16_t          x2, y2;
-    cvid_codebook     v4_codebook[256];
-    cvid_codebook     v1_codebook[256];
+    cvid_codebook     v4_codebook;
+    cvid_codebook     v1_codebook;
 } cvid_strip;
 
-typedef struct CinepakContext {
-
+typedef struct CinepakContext CinepakContext;
+struct CinepakContext {
+    const AVClass *class;
     AVCodecContext *avctx;
     AVFrame *frame;
 
@@ -71,195 +76,233 @@ typedef struct CinepakContext {
     int sega_film_skip_bytes;
 
     uint32_t pal[256];
-} CinepakContext;
-
-static void cinepak_decode_codebook (cvid_codebook *codebook,
-                                     int chunk_id, int size, const uint8_t *data)
-{
-    const uint8_t *eod = (data + size);
-    uint32_t flag, mask;
-    int      i, n;
-    uint8_t *p;
-
-    /* check if this chunk contains 4- or 6-element vectors */
-    n    = (chunk_id & 0x04) ? 4 : 6;
-    flag = 0;
-    mask = 0;
-
-    p = codebook[0];
-    for (i=0; i < 256; i++) {
-        if ((chunk_id & 0x01) && !(mask >>= 1)) {
-            if ((data + 4) > eod)
-                break;
-
-            flag  = AV_RB32 (data);
-            data += 4;
-            mask  = 0x80000000;
-        }
+    void (*decode_codebook)(CinepakContext *s, cvid_codebook *codebook,
+                            int chunk_id, int size, const uint8_t *data);
+    int  (*decode_vectors)(CinepakContext *s, cvid_strip *strip,
+                           int chunk_id, int size, const uint8_t *data);
+/* options */
+    enum AVPixelFormat out_pixfmt;
+};
 
-        if (!(chunk_id & 0x01) || (flag & mask)) {
-            int k, kk;
+#define OFFSET(x) offsetof(CinepakContext, x)
+#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+{"output_pixel_format", "set output pixel format as rgb24 or rgb565", OFFSET(out_pixfmt), AV_OPT_TYPE_PIXEL_FMT, {.i64=AV_PIX_FMT_NONE}, -1, INT_MAX, VD },
+    { NULL },
+};
 
-            if ((data + n) > eod)
-                break;
+static const AVClass cinepak_class = {
+    .class_name = "cinepak decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
-            for (k = 0; k < 4; ++k) {
-                int r = *data++;
-                for (kk = 0; kk < 3; ++kk)
-                    *p++ = r;
-            }
-            if (n == 6) {
-                int r, g, b, u, v;
-                u = *(int8_t *)data++;
-                v = *(int8_t *)data++;
-                p -= 12;
+#define CODEBOOK_PROLOGUE(pixel_format) \
+static void cinepak_decode_codebook_##pixel_format (CinepakContext *s,\
+    cvid_codebook *codebook, int chunk_id, int size, const uint8_t *data) {\
+    const uint8_t *eod;\
+    uint32_t flag, mask;\
+    int selective_update, i;\
+
+#define DECODE_CODEBOOK(pixel_format) \
+CODEBOOK_PROLOGUE(pixel_format) \
+    int n, palette_video;\
+
+#define CODEBOOK_STREAM_PARSING \
+    for (i=0; i < 256; i++) {\
+        if (selective_update && !(mask >>= 1)) {\
+            if ((data + 4) > eod) break;\
+            flag = AV_RB32 (data); data += 4; mask = 0x80000000;\
+        }\
+        if (!selective_update || (flag & mask)) {\
+            int k;\
+            if ((data + n) > eod) break;\
+
+#define CODEBOOK_INTRO \
+    selective_update = (chunk_id & 0x01);\
+    eod = (data + size); flag = mask = 0;\
+
+#define CODEBOOK_FULL_COLOR \
+    /* check if this chunk contains 4- or 6-element vectors */\
+    n = (chunk_id & 0x04) ? 4 : 6;\
+    palette_video = s->palette_video;\
+    CODEBOOK_INTRO\
+    CODEBOOK_STREAM_PARSING\
+
+#define DECODE_VECTORS(pixel_format) \
+static int cinepak_decode_vectors_##pixel_format (CinepakContext *s, cvid_strip *strip, int chunk_id, int size, const uint8_t *data) {\
+    const uint8_t   *eod;\
+    uint32_t         flag, mask;\
+    int              x, y, selective_update, v1_only;\
+    char            *ip0, *ip1, *ip2, *ip3;\
+
+#define VECTOR_INTRO \
+    CODEBOOK_INTRO\
+    v1_only = (chunk_id & 0x02);\
+    for (y=strip->y1; y < strip->y2; y+=4) {\
+
+#define VECTOR_STREAM_PARSING \
+        for (x=strip->x1; x < strip->x2; x+=4) {\
+            if (selective_update && !(mask >>= 1)) {\
+                if ((data + 4) > eod) return AVERROR_INVALIDDATA;\
+                flag  = AV_RB32 (data); data += 4; mask = 0x80000000;\
+            }\
+            if (!selective_update || (flag & mask)) {\
+                if (!v1_only && !(mask >>= 1)) {\
+                    if ((data + 4) > eod) return AVERROR_INVALIDDATA;\
+                    flag  = AV_RB32 (data); data += 4; mask = 0x80000000;\
+                }\
+                if (v1_only || (~flag & mask)) {\
+                    POINTER_TYPE *p;\
+                    if (data >= eod) return AVERROR_INVALIDDATA;\
+
+#define VECTOR_DO \
+/* take care of y dimension not being multiple of 4 */\
+        if(s->avctx->height - y > 1) {\
+            ip1 = ip0 + s->frame->linesize[0];\
+            if(s->avctx->height - y > 2) {\
+                ip2 = ip1 + s->frame->linesize[0];\
+                if(s->avctx->height - y > 3) {\
+                    ip3 = ip2 + s->frame->linesize[0];\
+                }\
+            }\
+        }\
+/* to get the correct picture for not-multiple-of-4 cases let us fill each\
+ * block from the bottom up, thus possibly overwriting the bottommost line\
+ * more than once but ending with the correct data in place\
+ * (instead of in-loop checking) */\
+        VECTOR_STREAM_PARSING\
+
+DECODE_CODEBOOK(rgb24)
+    uint8_t *p = codebook->rgb24[0];
+    CODEBOOK_FULL_COLOR
+            if (n == 4)
+                if (palette_video)
+                    for (k = 0; k < 4; ++k) {
+                        uint32_t r = s->pal[*data++];
+                        *p++ = (r>>16)&0xff; *p++ = (r>>8)&0xff; *p++ = r&0xff;
+                    }
+                else
+                    for (k = 0; k < 4; ++k) {
+                        int kk, r = *data++;
+                        for (kk = 0; kk < 3; ++kk) *p++ = r;
+                    }
+            else { /* n == 6 */
+                int y, u, v, v2, u2v, u2;
+                u = (int8_t)data[4]; v = (int8_t)data[5];
+                v2 = v*2; u2v = u/2+v; u2 = u*2;
                 for(k=0; k<4; ++k) {
-                    r = *p++ + v*2;
-                    g = *p++ - (u/2) - v;
-                    b = *p   + u*2;
-                    p -= 2;
-                    *p++ = av_clip_uint8(r);
-                    *p++ = av_clip_uint8(g);
-                    *p++ = av_clip_uint8(b);
+                    y = *data++;
+                    *p++ = av_clip_uint8(y+v2);
+                    *p++ = av_clip_uint8(y-u2v);
+                    *p++ = av_clip_uint8(y+u2);
                 }
+                data += 2;
             }
-        } else {
+        } else
             p += 12;
-        }
     }
 }
-
-static int cinepak_decode_vectors (CinepakContext *s, cvid_strip *strip,
-                                   int chunk_id, int size, const uint8_t *data)
-{
-    const uint8_t   *eod = (data + size);
-    uint32_t         flag, mask;
-    uint8_t         *cb0, *cb1, *cb2, *cb3;
-    int             x, y;
-    char            *ip0, *ip1, *ip2, *ip3;
-
-    flag = 0;
-    mask = 0;
-
-    for (y=strip->y1; y < strip->y2; y+=4) {
-
-/* take care of y dimension not being multiple of 4, such streams exist */
+DECODE_VECTORS(rgb24)
+    uint8_t *cb0, *cb1, *cb2, *cb3;
+    VECTOR_INTRO
         ip0 = ip1 = ip2 = ip3 = s->frame->data[0] +
-          (s->palette_video?strip->x1:strip->x1*3) + (y * s->frame->linesize[0]);
-        if(s->avctx->height - y > 1) {
-            ip1 = ip0 + s->frame->linesize[0];
-            if(s->avctx->height - y > 2) {
-                ip2 = ip1 + s->frame->linesize[0];
-                if(s->avctx->height - y > 3) {
-                    ip3 = ip2 + s->frame->linesize[0];
+                                strip->x1*3 + y*s->frame->linesize[0];
+#define POINTER_TYPE uint8_t
+        VECTOR_DO
+#undef POINTER_TYPE
+                    p = strip->v1_codebook.rgb24[*data++] + 6;
+                    memcpy(ip3 + 0, p, 3); memcpy(ip3 + 3, p, 3);
+                    memcpy(ip2 + 0, p, 3); memcpy(ip2 + 3, p, 3);
+                    p += 3; /* ... + 9 */
+                    memcpy(ip3 + 6, p, 3); memcpy(ip3 + 9, p, 3);
+                    memcpy(ip2 + 6, p, 3); memcpy(ip2 + 9, p, 3);
+                    p -= 9; /* ... + 0 */
+                    memcpy(ip1 + 0, p, 3); memcpy(ip1 + 3, p, 3);
+                    memcpy(ip0 + 0, p, 3); memcpy(ip0 + 3, p, 3);
+                    p += 3; /* ... + 3 */
+                    memcpy(ip1 + 6, p, 3); memcpy(ip1 + 9, p, 3);
+                    memcpy(ip0 + 6, p, 3); memcpy(ip0 + 9, p, 3);
+                } else if (flag & mask) {
+                    if ((data + 4) > eod) return AVERROR_INVALIDDATA;
+                    cb0 = strip->v4_codebook.rgb24[*data++];
+                    cb1 = strip->v4_codebook.rgb24[*data++];
+                    cb2 = strip->v4_codebook.rgb24[*data++];
+                    cb3 = strip->v4_codebook.rgb24[*data++];
+                    memcpy(ip3 + 0, cb2 + 6, 6); memcpy(ip3 + 6, cb3 + 6, 6);
+                    memcpy(ip2 + 0, cb2 + 0, 6); memcpy(ip2 + 6, cb3 + 0, 6);
+                    memcpy(ip1 + 0, cb0 + 6, 6); memcpy(ip1 + 6, cb1 + 6, 6);
+                    memcpy(ip0 + 0, cb0 + 0, 6); memcpy(ip0 + 6, cb1 + 0, 6);
                 }
             }
+            ip0 += 12; ip1 += 12; ip2 += 12; ip3 += 12;
         }
-/* to get the correct picture for not-multiple-of-4 cases let us fill each
- * block from the bottom up, thus possibly overwriting the bottommost line
- * more than once but ending with the correct data in place
- * (instead of in-loop checking) */
-
-        for (x=strip->x1; x < strip->x2; x+=4) {
-            if ((chunk_id & 0x01) && !(mask >>= 1)) {
-                if ((data + 4) > eod)
-                    return AVERROR_INVALIDDATA;
-
-                flag  = AV_RB32 (data);
-                data += 4;
-                mask  = 0x80000000;
-            }
-
-            if (!(chunk_id & 0x01) || (flag & mask)) {
-                if (!(chunk_id & 0x02) && !(mask >>= 1)) {
-                    if ((data + 4) > eod)
-                        return AVERROR_INVALIDDATA;
-
-                    flag  = AV_RB32 (data);
-                    data += 4;
-                    mask  = 0x80000000;
-                }
-
-                if ((chunk_id & 0x02) || (~flag & mask)) {
-                    uint8_t *p;
-                    if (data >= eod)
-                        return AVERROR_INVALIDDATA;
-
-                    p = strip->v1_codebook[*data++];
-                    if (s->palette_video) {
-                        ip3[0] = ip3[1] = ip2[0] = ip2[1] = p[6];
-                        ip3[2] = ip3[3] = ip2[2] = ip2[3] = p[9];
-                        ip1[0] = ip1[1] = ip0[0] = ip0[1] = p[0];
-                        ip1[2] = ip1[3] = ip0[2] = ip0[3] = p[3];
-                    } else {
-                        p += 6;
-                        memcpy(ip3 + 0, p, 3); memcpy(ip3 + 3, p, 3);
-                        memcpy(ip2 + 0, p, 3); memcpy(ip2 + 3, p, 3);
-                        p += 3; /* ... + 9 */
-                        memcpy(ip3 + 6, p, 3); memcpy(ip3 + 9, p, 3);
-                        memcpy(ip2 + 6, p, 3); memcpy(ip2 + 9, p, 3);
-                        p -= 9; /* ... + 0 */
-                        memcpy(ip1 + 0, p, 3); memcpy(ip1 + 3, p, 3);
-                        memcpy(ip0 + 0, p, 3); memcpy(ip0 + 3, p, 3);
-                        p += 3; /* ... + 3 */
-                        memcpy(ip1 + 6, p, 3); memcpy(ip1 + 9, p, 3);
-                        memcpy(ip0 + 6, p, 3); memcpy(ip0 + 9, p, 3);
+    }
+    return 0;
+}
+#define PACK_RGB_RGB565(r,g,b) (((av_clip_uint8((r)+4)>>3)<<11)|((av_clip_uint8((g)+2)>>2)<<5)|(av_clip_uint8((b)+4)>>3)) /* rounding to nearest */
+DECODE_CODEBOOK(rgb565)
+    uint16_t *p = codebook->rgb565[0];
+    CODEBOOK_FULL_COLOR
+            if (n == 4)
+                if (palette_video)
+                    for (k = 0; k < 4; ++k) {
+                        uint32_t r = s->pal[*data++];
+                        *p++ = PACK_RGB_RGB565((r>>16)&0xff,(r>>8)&0xff,r&0xff);
                     }
-
+                else
+                    for (k = 0; k < 4; ++k) {
+                        int r = *data++;
+                        *p++ = PACK_RGB_RGB565(r,r,r);
+                    }
+            else { /* n == 6 */
+                int y, u, v, v2, u2v, u2;
+                u = (int8_t)data[4]; v = (int8_t)data[5];
+                v2 = v*2; u2v = u/2+v; u2 = u*2;
+                for(k=0; k<4; ++k) {
+                    y = *data++;
+                    *p++ = PACK_RGB_RGB565(y+v2,y-u2v,y+u2);
+                }
+                data += 2;
+            }
+        } else
+            p += 4;
+    }
+}
+DECODE_VECTORS(rgb565)
+    uint16_t *cb0, *cb1, *cb2, *cb3;
+    VECTOR_INTRO
+        ip0 = ip1 = ip2 = ip3 = s->frame->data[0] +
+                                strip->x1*2 + y*s->frame->linesize[0];
+#define POINTER_TYPE uint16_t
+        VECTOR_DO
+#undef POINTER_TYPE
+                    p = strip->v1_codebook.rgb565[*data++];
+                    * (uint16_t *)ip3    = *((uint16_t *)ip3+1) =
+                    * (uint16_t *)ip2    = *((uint16_t *)ip2+1) = p[2];
+                    *((uint16_t *)ip3+2) = *((uint16_t *)ip3+3) =
+                    *((uint16_t *)ip2+2) = *((uint16_t *)ip2+3) = p[3];
+                    * (uint16_t *)ip1    = *((uint16_t *)ip1+1) =
+                    * (uint16_t *)ip0    = *((uint16_t *)ip0+1) = p[0];
+                    *((uint16_t *)ip1+2) = *((uint16_t *)ip1+3) =
+                    *((uint16_t *)ip0+2) = *((uint16_t *)ip0+3) = p[1];
                 } else if (flag & mask) {
                     if ((data + 4) > eod)
                         return AVERROR_INVALIDDATA;
-
-                    cb0 = strip->v4_codebook[*data++];
-                    cb1 = strip->v4_codebook[*data++];
-                    cb2 = strip->v4_codebook[*data++];
-                    cb3 = strip->v4_codebook[*data++];
-                    if (s->palette_video) {
-                        uint8_t *p;
-                        p = ip3;
-                        *p++ = cb2[6];
-                        *p++ = cb2[9];
-                        *p++ = cb3[6];
-                        *p   = cb3[9];
-                        p = ip2;
-                        *p++ = cb2[0];
-                        *p++ = cb2[3];
-                        *p++ = cb3[0];
-                        *p   = cb3[3];
-                        p = ip1;
-                        *p++ = cb0[6];
-                        *p++ = cb0[9];
-                        *p++ = cb1[6];
-                        *p   = cb1[9];
-                        p = ip0;
-                        *p++ = cb0[0];
-                        *p++ = cb0[3];
-                        *p++ = cb1[0];
-                        *p   = cb1[3];
-                    } else {
-                        memcpy(ip3 + 0, cb2 + 6, 6);
-                        memcpy(ip3 + 6, cb3 + 6, 6);
-                        memcpy(ip2 + 0, cb2 + 0, 6);
-                        memcpy(ip2 + 6, cb3 + 0, 6);
-                        memcpy(ip1 + 0, cb0 + 6, 6);
-                        memcpy(ip1 + 6, cb1 + 6, 6);
-                        memcpy(ip0 + 0, cb0 + 0, 6);
-                        memcpy(ip0 + 6, cb1 + 0, 6);
-                    }
-
+                    cb0 = strip->v4_codebook.rgb565[*data++];
+                    cb1 = strip->v4_codebook.rgb565[*data++];
+                    cb2 = strip->v4_codebook.rgb565[*data++];
+                    cb3 = strip->v4_codebook.rgb565[*data++];
+                    memcpy(ip3 + 0, cb2 + 2, 4); memcpy(ip3 + 4, cb3 + 2, 4);
+                    memcpy(ip2 + 0, cb2 + 0, 4); memcpy(ip2 + 4, cb3 + 0, 4);
+                    memcpy(ip1 + 0, cb0 + 2, 4); memcpy(ip1 + 4, cb1 + 2, 4);
+                    memcpy(ip0 + 0, cb0 + 0, 4); memcpy(ip0 + 4, cb1 + 0, 4);
                 }
             }
-
-            if (s->palette_video) {
-                ip0 += 4;  ip1 += 4;
-                ip2 += 4;  ip3 += 4;
-            } else {
-                ip0 += 12;  ip1 += 12;
-                ip2 += 12;  ip3 += 12;
-            }
+            ip0 += 8; ip1 += 8; ip2 += 8; ip3 += 8;
         }
     }
-
     return 0;
 }
 
@@ -286,29 +329,15 @@ static int cinepak_decode_strip (CinepakContext *s,
 
         switch (chunk_id) {
 
-        case 0x20:
-        case 0x21:
-        case 0x24:
-        case 0x25:
-            cinepak_decode_codebook (strip->v4_codebook, chunk_id,
-                chunk_size, data);
+        case 0x20: case 0x21: case 0x24: case 0x25:
+            s->decode_codebook(s, &strip->v4_codebook, chunk_id, chunk_size, data);
             break;
-
-        case 0x22:
-        case 0x23:
-        case 0x26:
-        case 0x27:
-            cinepak_decode_codebook (strip->v1_codebook, chunk_id,
-                chunk_size, data);
+        case 0x22: case 0x23: case 0x26: case 0x27:
+            s->decode_codebook (s, &strip->v1_codebook, chunk_id, chunk_size, data);
             break;
-
-        case 0x30:
-        case 0x31:
-        case 0x32:
-            return cinepak_decode_vectors (s, strip, chunk_id,
-                chunk_size, data);
+        case 0x30: case 0x31: case 0x32:
+            return s->decode_vectors (s, strip, chunk_id, chunk_size, data);
         }
-
         data += chunk_size;
     }
 
@@ -385,9 +414,9 @@ static int cinepak_decode (CinepakContext *s)
         strip_size = ((s->data + strip_size) > eod) ? (eod - s->data) : strip_size;
 
         if ((i > 0) && !(frame_flags & 0x01)) {
-            memcpy (s->strips[i].v4_codebook, s->strips[i-1].v4_codebook,
+            memcpy (&s->strips[i].v4_codebook, &s->strips[i-1].v4_codebook,
                 sizeof(s->strips[i].v4_codebook));
-            memcpy (s->strips[i].v1_codebook, s->strips[i-1].v1_codebook,
+            memcpy (&s->strips[i].v1_codebook, &s->strips[i-1].v1_codebook,
                 sizeof(s->strips[i].v1_codebook));
         }
 
@@ -402,6 +431,10 @@ static int cinepak_decode (CinepakContext *s)
     return 0;
 }
 
+static const enum AVPixelFormat pixfmt_list[] = {
+    AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB565, AV_PIX_FMT_NONE
+};
+
 static av_cold int cinepak_decode_init(AVCodecContext *avctx)
 {
     CinepakContext *s = avctx->priv_data;
@@ -412,15 +445,26 @@ static av_cold int cinepak_decode_init(AVCodecContext *avctx)
 
     s->sega_film_skip_bytes = -1;  /* uninitialized state */
 
-    // check for paletted data
-    if (avctx->bits_per_coded_sample != 8) {
-        s->palette_video = 0;
-        avctx->pix_fmt = AV_PIX_FMT_RGB24;
-    } else {
-        s->palette_video = 1;
-        avctx->pix_fmt = AV_PIX_FMT_PAL8;
+    /* check for paletted data */
+    s->palette_video = (avctx->bits_per_coded_sample == 8);
+
+    if (s->out_pixfmt != AV_PIX_FMT_NONE) /* the option is set to something */
+        avctx->pix_fmt = s->out_pixfmt;
+    else
+        avctx->pix_fmt = ff_get_format(avctx, pixfmt_list);
+
+#define DECODE_TO(pixel_format) \
+ s->decode_codebook = cinepak_decode_codebook_##pixel_format;\
+ s->decode_vectors  = cinepak_decode_vectors_##pixel_format;\
+ break;\
+
+    switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_RGB24:   DECODE_TO(rgb24)
+    case AV_PIX_FMT_RGB565:  DECODE_TO(rgb565)
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format %s\n", av_get_pix_fmt_name(avctx->pix_fmt));
+        return AVERROR(EINVAL);
     }
-
     s->frame = av_frame_alloc();
     if (!s->frame)
         return AVERROR(ENOMEM);
@@ -457,9 +501,6 @@ static int cinepak_decode_frame(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_ERROR, "cinepak_decode failed\n");
     }
 
-    if (s->palette_video)
-        memcpy (s->frame->data[1], s->pal, AVPALETTE_SIZE);
-
     if ((ret = av_frame_ref(data, s->frame)) < 0)
         return ret;
 
@@ -488,4 +529,5 @@ AVCodec ff_cinepak_decoder = {
     .close          = cinepak_decode_end,
     .decode         = cinepak_decode_frame,
     .capabilities   = AV_CODEC_CAP_DR1,
+    .priv_class     = &cinepak_class,
 };
-- 
2.11.0



More information about the ffmpeg-devel mailing list