[FFmpeg-devel] [PATCH 3/3] lavfi: add a Vulkan avgblur filter

Rostislav Pehlivanov atomnuker at gmail.com
Fri Mar 30 06:14:34 EEST 2018


This commit adds an average blur Vulkan filter which functions
exactly the same as avgblur but on Vulkan surfaces.

Currently contains a workaround that will be removed for the actual,
non-RFC version.

It implements a clever way of minimizing texel fetches by storing
all texels needed to filter and entire wavefront's worth of
workgroups in a shared cache, and then averaging over the area needed.

Currently, it lacks the ability to avoid edges of images and will mix
0s around the edges of planes. This will be fixed for the non-RFC
version.

Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>
---
 configure                       |   1 +
 libavfilter/Makefile            |   1 +
 libavfilter/allfilters.c        |   1 +
 libavfilter/vf_avgblur_vulkan.c | 353 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 356 insertions(+)
 create mode 100644 libavfilter/vf_avgblur_vulkan.c

diff --git a/configure b/configure
index 3621b5cdeb..388e45fed1 100755
--- a/configure
+++ b/configure
@@ -3293,6 +3293,7 @@ ass_filter_deps="libass"
 atempo_filter_deps="avcodec"
 atempo_filter_select="rdft"
 avgblur_opencl_filter_deps="opencl"
+avgblur_vulkan_filter_deps="vulkan libshaderc"
 azmq_filter_deps="libzmq"
 blackframe_filter_deps="gpl"
 boxblur_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index a90ca30ad7..f0a47320c8 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -141,6 +141,7 @@ OBJS-$(CONFIG_ATADENOISE_FILTER)             += vf_atadenoise.o
 OBJS-$(CONFIG_AVGBLUR_FILTER)                += vf_avgblur.o
 OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o opencl.o \
                                                 opencl/avgblur.o
+OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER)         += vf_avgblur_vulkan.o vulkan.o
 OBJS-$(CONFIG_BBOX_FILTER)                   += bbox.o vf_bbox.o
 OBJS-$(CONFIG_BENCH_FILTER)                  += f_bench.o
 OBJS-$(CONFIG_BITPLANENOISE_FILTER)          += vf_bitplanenoise.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 1cf13409ca..3cbaecd726 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -150,6 +150,7 @@ static void register_all(void)
     REGISTER_FILTER(ATADENOISE,     atadenoise,     vf);
     REGISTER_FILTER(AVGBLUR,        avgblur,        vf);
     REGISTER_FILTER(AVGBLUR_OPENCL, avgblur_opencl, vf);
+    REGISTER_FILTER(AVGBLUR_VULKAN, avgblur_vulkan, vf);
     REGISTER_FILTER(BBOX,           bbox,           vf);
     REGISTER_FILTER(BENCH,          bench,          vf);
     REGISTER_FILTER(BITPLANENOISE,  bitplanenoise,  vf);
diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c
new file mode 100644
index 0000000000..a2c0fddd98
--- /dev/null
+++ b/libavfilter/vf_avgblur_vulkan.c
@@ -0,0 +1,353 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "internal.h"
+
+typedef struct AvgBlurVulkanContext {
+    VulkanFilterContext vkctx;
+
+    int initialized;
+    AVVkExecContext exec;
+    AVVkBuffer shader_buf;
+
+    /* Shader updators, must be in the main filter struct */
+    VkDescriptorImageInfo input_images[3];
+    VkDescriptorImageInfo output_images[3];
+
+    int size_x;
+    int size_y;
+    int planes;
+} AvgBlurVulkanContext;
+
+#define RET(x) \
+    do { \
+        if ((err = (x)) < 0) \
+            goto fail; \
+    } while (0)
+
+static const char blur_kernel[] = {
+    C(0, #define CACHE_SIZE (ivec2(gl_WorkGroupSize) + FILTER_RADIUS*2)        )
+    C(0, shared vec4 cache[AREA(CACHE_SIZE)];                                  )
+    C(0,                                                                       )
+    C(0, void blur_kernel(int idx, ivec2 pos)                                  )
+    C(0, {                                                                     )
+    C(1,     ivec2 d;                                                          )
+    C(1,     const ivec2 s = CACHE_SIZE;                                       )
+    C(1,     const ivec2 w = ivec2(gl_WorkGroupSize);                          )
+    C(1,     const ivec2 l = ivec2(gl_LocalInvocationID.xy);                   )
+    C(1,                                                                       )
+    C(1,     for (d.y = l.y; d.y < s.y; d.y += w.y) {                          )
+    C(2,         for (d.x = l.x; d.x < s.x; d.x += w.x) {                      )
+    C(3,             const ivec2 np = pos + d - l - FILTER_RADIUS;             )
+    C(3,             cache[d.y*s.x + d.x] = imageLoad(input_img[idx], np);     )
+    C(2,         }                                                             )
+    C(1,     }                                                                 )
+    C(0,                                                                       )
+    C(1,     barrier();                                                        )
+    C(0,                                                                       )
+    C(1,     vec4 avg = vec4(0.0f);                                            )
+    C(1,     ivec2 start = ivec2(0);                                           )
+    C(1,     ivec2 end = FILTER_RADIUS*2 + 1;                                  )
+    C(1,     for (d.y = start.y; d.y < end.y; d.y++)                           )
+    C(2,         for (d.x = start.x; d.x < end.x; d.x++)                       )
+    C(3,              avg += cache[(l.y + d.y)*s.x + l.x + d.x];               )
+    C(0,                                                                       )
+    C(1,     avg /= AREA(end - start);                                         )
+    C(1,     imageStore(output_img[idx], pos, avg);                            )
+    C(0, }                                                                     )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+{
+    int err;
+    AvgBlurVulkanContext *s = ctx->priv;
+
+    /* Create sampler */
+    ff_vk_init_sampler(ctx, NULL);
+
+    { /* Create the shader */
+        const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+        SPIRVShader *shd = ff_vk_init_shader(ctx, "avgblur_compute",
+                                             VK_SHADER_STAGE_COMPUTE_BIT);
+        ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+        VulkanDescriptorSetBinding desc_i[2] = {
+            {
+                .name       = "input_img",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .mem_layout = "rgba8",
+                .mem_quali  = "readonly",
+                .dimensions = 2,
+                .elems      = planes,
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+                .updater    = s->input_images,
+            },
+            {
+                .name       = "output_img",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .mem_layout = "rgba8",
+                .mem_quali  = "writeonly",
+                .dimensions = 2,
+                .elems      = planes,
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+                .updater    = s->output_images,
+            },
+        };
+
+        RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 2, 0)); /* set 0 */
+
+        GLSLF(0, #define FILTER_RADIUS ivec2(%i, %i),     s->size_x, s->size_y);
+        GLSLD(   blur_kernel                                                  );
+        GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y))   );
+        GLSLC(0, void main()                                                  );
+        GLSLC(0, {                                                            );
+        GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);             );
+        GLSLF(1,     for (int i = 0; i < %i; i++) {,                    planes);
+        GLSLC(2,         if (!IS_WITHIN(pos, imageSize(input_img[i]))) {      );
+        GLSLC(3,             barrier();                                       );
+        GLSLC(3,             continue;                                        );
+        GLSLC(2,         }                                                    );
+        GLSLC(2,         else barrier();                                      ); /* Workaround */
+        GLSLF(2,         if ((0x%x & (1 << i)) != 0),                s->planes);
+        GLSLC(3,             blur_kernel(i, pos);                             );
+        GLSLC(2,         else                                                 );
+        GLSLC(3,             COPY_IMG(output_img[i], input_img[i], pos);      );
+        GLSLC(1,     }                                                        );
+        GLSLC(0, }                                                            );
+
+        RET(ff_vk_compile_shader(ctx, shd, "main"));
+    }
+
+    RET(ff_vk_init_pipeline_layout(ctx));
+
+    /* Execution context */
+    RET(av_vk_create_exec_ctx(s->vkctx.device, &s->exec,
+                              s->vkctx.hwctx->queue_family_comp_index));
+
+    /* The pipeline */
+    RET(ff_vk_init_compute_pipeline(ctx));
+
+    s->initialized = 1;
+
+    return 0;
+
+fail:
+    return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
+{
+    int err;
+    AvgBlurVulkanContext *s = avctx->priv;
+    int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+    VkCommandBufferBeginInfo cmd_start = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+    };
+
+    VkComponentMapping null_map = {
+        .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+        .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+        .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+        .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+    };
+
+    for (int i = 0; i < planes; i++) {
+        RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
+                                   ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
+                                   ff_vk_aspect_flags(s->vkctx.input_format, i),
+                                   null_map, NULL));
+
+        RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+                                   ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+                                   ff_vk_aspect_flags(s->vkctx.output_format, i),
+                                   null_map, NULL));
+
+        s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+        s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+    }
+
+    ff_vk_update_descriptor_set(avctx, 0);
+
+    vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+    {
+        VkImageMemoryBarrier bar[2] = {
+            {
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .srcAccessMask = 0,
+                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                .oldLayout = in->layout,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = in->img,
+                .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+                .subresourceRange.levelCount = 1,
+                .subresourceRange.layerCount = 1,
+            },
+            {
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .srcAccessMask = 0,
+                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                .oldLayout = out->layout,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = out->img,
+                .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+                .subresourceRange.levelCount = 1,
+                .subresourceRange.layerCount = 1,
+            },
+        };
+
+        vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+                            0, NULL, 0, NULL, 2, bar);
+
+        in->layout = bar[0].newLayout;
+        in->access = bar[0].dstAccessMask;
+
+        out->layout = bar[1].newLayout;
+        out->access = bar[1].dstAccessMask;
+    }
+
+    vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+    vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+    vkCmdDispatch(s->exec.buf,
+                  FFALIGN(s->vkctx.output_width,  s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+                  FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+    vkEndCommandBuffer(s->exec.buf);
+
+    VkSubmitInfo s_info = {
+        .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .commandBufferCount   = 1,
+        .pCommandBuffers      = &s->exec.buf,
+    };
+
+    VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+    if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+               av_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    } else {
+        vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+        vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+    }
+
+fail:
+
+    for (int i = 0; i < planes; i++) {
+        ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
+        ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+    }
+
+    return err;
+}
+
+static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    int err;
+    AVFilterContext *ctx = link->dst;
+    AvgBlurVulkanContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+
+    AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if (!s->initialized)
+        RET(init_filter(ctx, in));
+
+    RET(process_frames(ctx, (AVVkFrame *)out->data[0],
+                            (AVVkFrame *) in->data[0]));
+
+    err = av_frame_copy_props(out, in);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+
+fail:
+    av_frame_free(&in);
+    av_frame_free(&out);
+    return err;
+}
+
+static void avgblur_vulkan_uninit(AVFilterContext *avctx)
+{
+    AvgBlurVulkanContext *s = avctx->priv;
+
+    av_vk_free_exec_ctx(s->vkctx.device, &s->exec);
+    av_vk_free_buf(s->vkctx.device, &s->shader_buf);
+    ff_vk_filter_uninit(avctx);
+
+    s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(AvgBlurVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption avgblur_vulkan_options[] = {
+    { "sizeX",  "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS },
+    { "planes", "Set planes to filter",  OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0,  0xF, .flags = FLAGS },
+    { "sizeY",  "Set vertical radius",   OFFSET(size_y), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(avgblur_vulkan);
+
+static const AVFilterPad avgblur_vulkan_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &avgblur_vulkan_filter_frame,
+        .config_props = &ff_vk_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avgblur_vulkan_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_vk_filter_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_avgblur_vulkan = {
+    .name           = "avgblur_vulkan",
+    .description    = NULL_IF_CONFIG_SMALL("Apply avgblur mask to input video"),
+    .priv_size      = sizeof(AvgBlurVulkanContext),
+    .init           = &ff_vk_filter_init,
+    .uninit         = &avgblur_vulkan_uninit,
+    .query_formats  = &ff_vk_filter_query_formats,
+    .inputs         = avgblur_vulkan_inputs,
+    .outputs        = avgblur_vulkan_outputs,
+    .priv_class     = &avgblur_vulkan_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
-- 
2.16.3



More information about the ffmpeg-devel mailing list