[FFmpeg-devel] [PATCH 3/6] lavfi: add a Vulkan avgblur filter

Mark Thompson sw at jkqxz.net
Sun Sep 2 23:40:58 EEST 2018


On 21/06/18 17:55, Rostislav Pehlivanov wrote:
> Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>
> ---
>  configure                       |   1 +
>  libavfilter/Makefile            |   1 +
>  libavfilter/allfilters.c        |   1 +
>  libavfilter/vf_avgblur_vulkan.c | 343 ++++++++++++++++++++++++++++++++
>  4 files changed, 346 insertions(+)
>  create mode 100644 libavfilter/vf_avgblur_vulkan.c

This filter seems to always hang when run on current ANV?

$ ./ffmpeg_g -v 55 -y -i in.mp4 -an -init_hw_device vulkan=v:'Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)' -filter_hw_device v -vf 'hwupload,avgblur_vulkan,hwdownload' -c:v libx264 -frames:v 1000 out.mp4
...
[Parsed_avgblur_vulkan_1 @ 0x55f1ba284ac0] Shader linked! Size: 3464 bytes
INTEL-MESA: error: ../../../src/intel/vulkan/anv_device.c:2004: GPU hung on one of our command buffers (VK_ERROR_DEVICE_LOST)
[AVHWDeviceContext @ 0x55f1b9219b00] Unable to submit command buffer: VK_ERROR_DEVICE_LOST

with kernel log "[drm] GPU HANG: ecode 9:0:0x8ed9fff2, in ffmpeg_g [6451], reason: Hang on rcs0, action: reset".

It runs on RADV and the output looks plausible, but it's nondeterministic somehow (checksums never match).  I think that means the shader program must be racing or contain some undefined behaviour.

> ...
> +
> +static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
> +{
> +    int err;
> +    AvgBlurVulkanContext *s = avctx->priv;
> +    int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
> +
> +    VkCommandBufferBeginInfo cmd_start = {
> +        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
> +        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
> +    };
> +
> +    VkComponentMapping null_map = {
> +        .r = VK_COMPONENT_SWIZZLE_IDENTITY,
> +        .g = VK_COMPONENT_SWIZZLE_IDENTITY,
> +        .b = VK_COMPONENT_SWIZZLE_IDENTITY,
> +        .a = VK_COMPONENT_SWIZZLE_IDENTITY,
> +    };
> +
> +    for (int i = 0; i < planes; i++) {
> +        RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
> +                                   ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
> +                                   ff_vk_aspect_flags(s->vkctx.input_format, i),
> +                                   null_map, NULL));
> +
> +        RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
> +                                   ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
> +                                   ff_vk_aspect_flags(s->vkctx.output_format, i),
> +                                   null_map, NULL));
> +
> +        s->input_images[i].imageLayout  = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
> +        s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
> +    }

Approximately this fragment seems to be common between all the filters - maybe it should be abstracted into vulkan.c?

> +
> +    ff_vk_update_descriptor_set(avctx, 0);
> +
> +    vkBeginCommandBuffer(s->exec.buf, &cmd_start);
> +
> +    {
> +        VkImageMemoryBarrier bar[2] = {
> +            {
> +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
> +                .srcAccessMask = 0,
> +                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
> +                .oldLayout = in->layout,
> +                .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
> +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> +                .image = in->img,
> +                .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
> +                .subresourceRange.levelCount = 1,
> +                .subresourceRange.layerCount = 1,
> +            },
> +            {
> +                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
> +                .srcAccessMask = 0,
> +                .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
> +                .oldLayout = out->layout,
> +                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
> +                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> +                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> +                .image = out->img,
> +                .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.output_format, -1),
> +                .subresourceRange.levelCount = 1,
> +                .subresourceRange.layerCount = 1,
> +            },
> +        };
> +
> +        vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
> +                            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
> +                            0, NULL, 0, NULL, 2, bar);
> +
> +        in->layout  = bar[0].newLayout;
> +        in->access  = bar[0].dstAccessMask;
> +
> +        out->layout = bar[1].newLayout;
> +        out->access = bar[1].dstAccessMask;
> +    }
> +
> +    vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
> +    vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
> +    vkCmdDispatch(s->exec.buf,
> +                  FFALIGN(s->vkctx.output_width,  s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
> +                  FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
> +
> +    vkEndCommandBuffer(s->exec.buf);
> +
> +    VkSubmitInfo s_info = {
> +        .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
> +        .commandBufferCount   = 1,
> +        .pCommandBuffers      = &s->exec.buf,
> +    };
> +
> +    VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
> +    if (ret != VK_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
> +               ff_vk_ret2str(ret));
> +        return AVERROR_EXTERNAL;
> +    } else {
> +        vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
> +        vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
> +    }
> +
> +fail:
> +
> +    for (int i = 0; i < planes; i++) {
> +        ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
> +        ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
> +    }
> +
> +    return err;
> +}
> +


More information about the ffmpeg-devel mailing list