[FFmpeg-devel] [PATCH/WIP] lavfi: hqx filters.

Sun Jun 8 23:15:17 CEST 2014

---
Only hq2x for now.

Original code for hq2x is about 2800 lines of unrolled code. Current filter is
about 300 lines of code. The code in hq2x_interp() is generated from
https://github.com/ubitux/hqx ("make code" - see also "make show"), which is
done by parsing, analyzing and cutting down hq2x.c and friends.

The same need to be done for hq3x and hq4x. I don't know how long it will take.
Help welcome for those interested.

See also Ticket #3404.
---
 libavfilter/Makefile        |   1 +
 libavfilter/allfilters.c    |   1 +
 libavfilter/vf_hqx.c        | 306 ++++++++++++++++++++++++++++++++++++++++++++
 tests/fate/filter-video.mak |   7 +
 tests/ref/fate/filter-hq2x  |   6 +
 tests/ref/fate/filter-hq3x  |   6 +
 tests/ref/fate/filter-hq4x  |   6 +
 7 files changed, 333 insertions(+)
 create mode 100644 libavfilter/vf_hqx.c
 create mode 100644 tests/ref/fate/filter-hq2x
 create mode 100644 tests/ref/fate/filter-hq3x
 create mode 100644 tests/ref/fate/filter-hq4x

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 8ba0312..ea9815d 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -130,6 +130,7 @@ OBJS-$(CONFIG_HFLIP_FILTER)                  += vf_hflip.o
 OBJS-$(CONFIG_HISTEQ_FILTER)                 += vf_histeq.o
 OBJS-$(CONFIG_HISTOGRAM_FILTER)              += vf_histogram.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += vf_hqdn3d.o
+OBJS-$(CONFIG_HQX_FILTER)                    += vf_hqx.o
 OBJS-$(CONFIG_HUE_FILTER)                    += vf_hue.o
 OBJS-$(CONFIG_IDET_FILTER)                   += vf_idet.o
 OBJS-$(CONFIG_IL_FILTER)                     += vf_il.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 55d505a..7e1fd1d 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -148,6 +148,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER(HISTEQ,         histeq,         vf);
     REGISTER_FILTER(HISTOGRAM,      histogram,      vf);
     REGISTER_FILTER(HQDN3D,         hqdn3d,         vf);
+    REGISTER_FILTER(HQX,            hqx,            vf);
     REGISTER_FILTER(HUE,            hue,            vf);
     REGISTER_FILTER(IDET,           idet,           vf);
     REGISTER_FILTER(IL,             il,             vf);
diff --git a/libavfilter/vf_hqx.c b/libavfilter/vf_hqx.c
new file mode 100644
index 0000000..4b83b07
--- /dev/null
+++ b/libavfilter/vf_hqx.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2014 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/avassert.h"
+#include "libavutil/pixdesc.h"
+#include "internal.h"
+
+typedef void (*hqxfunc_t)(const uint32_t *r2y,
+                          uint8_t       *dst, int dst_linesize,
+                          const uint8_t *src, int src_linesize,
+                          int w, int h);
+
+typedef struct {
+    const AVClass *class;
+    int n;
+    hqxfunc_t func;
+    uint32_t rgbtoyuv[1<<24];
+} HQXContext;
+
+#define OFFSET(x) offsetof(HQXContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption hqx_options[] = {
+    { "n", "set scale factor", OFFSET(n), AV_OPT_TYPE_INT, {.i64 = 4}, 2, 4, .flags = FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(hqx);
+
+static av_always_inline uint32_t rgb2yuv(const uint32_t *r2y, uint32_t c)
+{
+    return r2y[c & 0xffffff];
+}
+
+static av_always_inline int yuv_diff(uint32_t yuv1, uint32_t yuv2)
+{
+#define YMASK   0xff0000
+#define UMASK   0x00ff00
+#define VMASK   0x0000ff
+#define TRESH_Y 0x300000
+#define TRESH_U 0x000700
+#define TRESH_V 0x000006
+    return abs((yuv1 & YMASK) - (yuv2 & YMASK)) > TRESH_Y ||
+           abs((yuv1 & UMASK) - (yuv2 & UMASK)) > TRESH_U ||
+           abs((yuv1 & VMASK) - (yuv2 & VMASK)) > TRESH_V;
+}
+
+static av_always_inline uint32_t interp_2px(uint32_t c1, int w1, uint32_t c2, int w2, int s)
+{
+    return (((((c1 & 0xff000000) >> 24) * w1 + ((c2 & 0xff000000) >> 24) * w2) << (24 - s)) & 0xff000000) |
+           (((((c1 & 0x00ff0000) >> 16) * w1 + ((c2 & 0x00ff0000) >> 16) * w2) << (16 - s)) & 0x00ff0000) |
+           (((((c1 & 0x0000ff00) >>  8) * w1 + ((c2 & 0x0000ff00) >>  8) * w2) << ( 8 - s)) & 0x0000ff00) |
+           (((((c1 & 0x000000ff)      ) * w1 + ((c2 & 0x000000ff)      ) * w2) >>       s ) & 0x000000ff);
+}
+
+static av_always_inline uint32_t interp_3px(uint32_t c1, int w1, uint32_t c2, int w2, uint32_t c3, int w3, int s)
+{
+    return (((((c1 & 0xff000000) >> 24) * w1 + ((c2 & 0xff000000) >> 24) * w2 + ((c3 & 0xff000000) >> 24) * w3) << (24 - s)) & 0xff000000) |
+           (((((c1 & 0x00ff0000) >> 16) * w1 + ((c2 & 0x00ff0000) >> 16) * w2 + ((c3 & 0x00ff0000) >> 16) * w3) << (16 - s)) & 0x00ff0000) |
+           (((((c1 & 0x0000ff00) >>  8) * w1 + ((c2 & 0x0000ff00) >>  8) * w2 + ((c3 & 0x0000ff00) >>  8) * w3) << ( 8 - s)) & 0x0000ff00) |
+           (((((c1 & 0x000000ff)      ) * w1 + ((c2 & 0x000000ff)      ) * w2 + ((c3 & 0x000000ff)      ) * w3) >>       s ) & 0x000000ff);
+}
+
+static av_always_inline uint32_t hq2x_interp(const uint32_t *r2y, int k,
+                                             const uint32_t *w,
+                                             int p0, int p1, int p2,
+                                             int p3, int p4, int p5,
+                                             int p6, int p7, int p8)
+{
+/* m is the mask of diff with the center pixel that matters in the pattern, and
+ * r is the expected result (bit set to 1 if there is difference with the
+ * center, 0 otherwise */
+#define P(m, r) ((k_transposed & (m)) == (r))
+/* adjust 012345678 to 01235678: the mask doesn't contain the (null) diff
+ * between the center/current pixel and itself */
+#define DROP4(z) ((z) > 4 ? (z)-1 : (z))
+/* transpose the input mask */
+#define TRP(x, v, n) (((x) >> (7-(DROP4(v))) & 1) << (n))
+
+#define WDIFF(c1, c2) yuv_diff(rgb2yuv(r2y, c1), rgb2yuv(r2y, c2))
+
+    const int k_transposed = TRP(k,p0,7) | TRP(k,p1,6) | TRP(k,p2,5)
+                           | TRP(k,p3,4) |      0      | TRP(k,p5,3)
+                           | TRP(k,p6,2) | TRP(k,p7,1) | TRP(k,p8,0);
+
+    const uint32_t w0 = w[p0], w1 = w[p1],
+                   w3 = w[p3], w4 = w[p4], w5 = w[p5],
+                               w7 = w[p7];
+
+    if ((P(0xbf,0x37) || P(0xdb,0x13)) && WDIFF(w1, w5))
+        return interp_2px(w4, 3, w3, 1, 2);
+    if ((P(0xef,0x6d) || P(0xdb,0x49)) && WDIFF(w7, w3))
+        return interp_2px(w4, 3, w1, 1, 2);
+    if ((P(0x0b,0x0b) || P(0xfe,0x1a) || P(0xfe,0x4a)) && WDIFF(w3, w1))
+        return w4;
+    if ((P(0xeb,0x8a) || P(0xaf,0x8a) || P(0x7f,0x5a) || P(0xcf,0x8a) || P(0x6f,0x2a) || P(0xbb,0x8a) || P(0x3f,0x0e) || P(0x5b,0x0a) || P(0x9f,0x8a) || P(0xfb,0x5a) || P(0xef,0x4e) || P(0xdf,0x5a) || P(0xbf,0x3a)) && WDIFF(w3, w1))
+        return interp_2px(w4, 3, w0, 1, 2);
+    if (P(0x0b,0x08))
+        return interp_3px(w4, 2, w0, 1, w1, 1, 2);
+    if (P(0x0b,0x02))
+        return interp_3px(w4, 2, w0, 1, w3, 1, 2);
+    if (P(0x2f,0x2f))
+        return interp_3px(w4, 14, w3, 1, w1, 1, 4);
+    if (P(0xbf,0x37) || P(0xdb,0x13))
+        return interp_3px(w4, 5, w1, 2, w3, 1, 3);
+    if (P(0xef,0x6d) || P(0xdb,0x49))
+        return interp_3px(w4, 5, w3, 2, w1, 1, 3);
+    if (P(0x8b,0x83) || P(0x6b,0x43) || P(0x4f,0x43) || P(0x1b,0x03))
+        return interp_2px(w4, 3, w3, 1, 2);
+    if (P(0x8b,0x89) || P(0x3b,0x19) || P(0x4b,0x09) || P(0x1f,0x19))
+        return interp_2px(w4, 3, w1, 1, 2);
+    if (P(0xef,0xab) || P(0xbf,0x8f) || P(0x7e,0x0e) || P(0x7e,0x2a))
+        return interp_3px(w4, 2, w3, 3, w1, 3, 3);
+    if (P(0xdf,0xde) || P(0xfb,0xfa) || P(0x3f,0x3e) || P(0x6f,0x6e) || P(0xdf,0x1e) || P(0xfb,0x6a))
+        return interp_2px(w4, 3, w0, 1, 2);
+    if (P(0x0a,0x00) || P(0x3b,0x1b) || P(0x4f,0x4b) || P(0x2f,0x0b) || P(0x7e,0x0a) || P(0xeb,0x4b) || P(0x9f,0x1b) || P(0xee,0x0a) || P(0xbe,0x0a))
+        return interp_3px(w4, 2, w3, 1, w1, 1, 2);
+    return interp_3px(w4, 6, w3, 1, w1, 1, 3);
+}
+
+static av_always_inline void hqx_filter(const uint32_t *r2y,
+                                        uint8_t       *dst, uint32_t dst_linesize,
+                                        const uint8_t *src, uint32_t src_linesize,
+                                        int width, int height, int n)
+{
+    int i, j, k;
+    const int dst32_linesize = dst_linesize >> 2;
+    const int src32_linesize = src_linesize >> 2;
+
+    for (j = 0; j < height; j++) {
+        const uint32_t *src32 = (const uint32_t *)src;
+              uint32_t *dst32 = (uint32_t *)dst;
+        const int prevline = j > 0          ? -src32_linesize : 0;
+        const int nextline = j < height - 1 ?  src32_linesize : 0;
+
+        for (i = 0; i < width; i++) {
+            uint32_t yuv1, yuv2;
+            const int prevcol = i > 0        ? -1 : 0;
+            const int nextcol = i < width -1 ?  1 : 0;
+            int pattern = 0, flag = 1;
+            const uint32_t w[3*3] = {
+                src32[prevcol + prevline], src32[prevline], src32[prevline + nextcol],
+                src32[prevcol           ], src32[       0], src32[           nextcol],
+                src32[prevcol + nextline], src32[nextline], src32[nextline + nextcol]
+            };
+
+            yuv1 = rgb2yuv(r2y, w[4]);
+
+            for (k = 0; k < FF_ARRAY_ELEMS(w); k++) {
+                if (k == 4)
+                    continue;
+                if (w[k] != w[4]) {
+                    yuv2 = rgb2yuv(r2y, w[k]);
+                    if (yuv_diff(yuv1, yuv2))
+                        pattern |= flag;
+                }
+                flag <<= 1;
+            }
+
+            if (n == 2) {
+                dst32[               0] = hq2x_interp(r2y, pattern, w, 0,1,2,3,4,5,6,7,8); // 00
+                dst32[               1] = hq2x_interp(r2y, pattern, w, 2,5,8,1,4,7,0,3,6); // 01
+                dst32[dst32_linesize+0] = hq2x_interp(r2y, pattern, w, 6,3,0,7,4,1,8,5,2); // 10
+                dst32[dst32_linesize+1] = hq2x_interp(r2y, pattern, w, 8,7,6,5,4,3,2,1,0); // 11
+            } else if (n == 3) {
+                // TODO
+                av_assert0(0);
+            } else if (n == 4) {
+                // TODO
+                av_assert0(0);
+            } else {
+                av_assert0(0);
+            }
+
+            src32 += 1;
+            dst32 += n;
+        }
+
+        src += src_linesize;
+        dst += dst_linesize * n;
+
+    }
+}
+
+#define HQX_FUNC(size) \
+static void hq##size##x(const uint32_t *r2y, \
+                        uint8_t       *dst, int dst_linesize, \
+                        const uint8_t *src, int src_linesize, \
+                        int w, int h) \
+{ \
+    hqx_filter(r2y, dst, dst_linesize, src, src_linesize, w, h, size); \
+}
+
+HQX_FUNC(2)
+HQX_FUNC(3)
+HQX_FUNC(4)
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_BGRA, AV_PIX_FMT_NONE};
+    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    HQXContext *hqx = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+
+    outlink->w = inlink->w * hqx->n;
+    outlink->h = inlink->h * hqx->n;
+    av_log(inlink->dst, AV_LOG_VERBOSE, "fmt:%s size:%dx%d -> size:%dx%d\n",
+           av_get_pix_fmt_name(inlink->format),
+           inlink->w, inlink->h, outlink->w, outlink->h);
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    HQXContext *hqx = ctx->priv;
+    AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+    out->width  = outlink->w;
+    out->height = outlink->h;
+
+    hqx->func(hqx->rgbtoyuv,
+              out->data[0], out->linesize[0],
+              in ->data[0], in ->linesize[0],
+              inlink->w, inlink->h);
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    HQXContext *hqx = ctx->priv;
+    static const hqxfunc_t hqxfuncs[] = {hq2x, hq3x, hq4x};
+
+    uint32_t c, r, g, b, y, u, v;
+    for (c = 0; c <= FF_ARRAY_ELEMS(hqx->rgbtoyuv); c++) {
+        r = c >> 16 & 0xff;
+        g = c >>  8 & 0xff;
+        b = c       & 0xff;
+        y = (uint32_t)( 0.299*r + 0.587*g + 0.114*b);
+        u = (uint32_t)(-0.169*r - 0.331*g +   0.5*b) + 128;
+        v = (uint32_t)(   0.5*r - 0.419*g - 0.081*b) + 128;
+        hqx->rgbtoyuv[c] = (y << 16) + (u << 8) + v;
+    }
+
+    hqx->func = hqxfuncs[hqx->n - 2];
+    return 0;
+}
+
+static const AVFilterPad hqx_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad hqx_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_hqx = {
+    .name          = "hqx",
+    .description   = NULL_IF_CONFIG_SMALL("Scale the input by 2, 3 or 4 using the hq*x magnification algorithm."),
+    .priv_size     = sizeof(HQXContext),
+    .init          = init,
+    .query_formats = query_formats,
+    .inputs        = hqx_inputs,
+    .outputs       = hqx_outputs,
+    .priv_class    = &hqx_class,
+};
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index 08349ce..0624444 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -140,6 +140,13 @@ FATE_FILTER-$(call ALLYES, SMJPEG_DEMUXER MJPEG_DECODER PERMS_FILTER HQDN3D_FILT
 fate-filter-hqdn3d-sample: tests/data/filtergraphs/hqdn3d
 fate-filter-hqdn3d-sample: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/smjpeg/scenwin.mjpg -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/hqdn3d -an
 
+FATE_FILTER_HQX-$(call ALLYES, IMAGE2_DEMUXER PNG_DECODER HQX_FILTER) = fate-filter-hq4x fate-filter-hq3x fate-filter-hq2x
+FATE_FILTER-yes += $(FATE_FILTER_HQX-yes)
+fate-filter-hq4x: CMD = framemd5 -i $(TARGET_SAMPLES)/filter/hqx.png -vf hqx=2
+fate-filter-hq3x: CMD = framemd5 -i $(TARGET_SAMPLES)/filter/hqx.png -vf hqx=2
+fate-filter-hq2x: CMD = framemd5 -i $(TARGET_SAMPLES)/filter/hqx.png -vf hqx=2
+fate-filter-hqx: $(FATE_FILTER_HQX-yes)
+
 FATE_FILTER-$(call ALLYES, UTVIDEO_DECODER AVI_DEMUXER PERMS_FILTER CURVES_FILTER) += fate-filter-curves
 fate-filter-curves: CMD = framecrc -i $(TARGET_SAMPLES)/utvideo/utvideo_rgb_median.avi -vf perms=random,curves=vintage
 
diff --git a/tests/ref/fate/filter-hq2x b/tests/ref/fate/filter-hq2x
new file mode 100644
index 0000000..92391b6
--- /dev/null
+++ b/tests/ref/fate/filter-hq2x
@@ -0,0 +1,6 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/25
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   131072, 66f8f4e12c37534c2c6dbf715203290d
diff --git a/tests/ref/fate/filter-hq3x b/tests/ref/fate/filter-hq3x
new file mode 100644
index 0000000..92391b6
--- /dev/null
+++ b/tests/ref/fate/filter-hq3x
@@ -0,0 +1,6 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/25
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   131072, 66f8f4e12c37534c2c6dbf715203290d
diff --git a/tests/ref/fate/filter-hq4x b/tests/ref/fate/filter-hq4x
new file mode 100644
index 0000000..92391b6
--- /dev/null
+++ b/tests/ref/fate/filter-hq4x
@@ -0,0 +1,6 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/25
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,        1,   131072, 66f8f4e12c37534c2c6dbf715203290d
-- 
2.0.0