[FFmpeg-cvslog] lavfi/vulkan: refactor, fix and fully implement multiple queues

Lynne git at videolan.org
Fri Nov 12 06:52:46 EET 2021


ffmpeg | branch: master | Lynne <dev at lynne.ee> | Wed Nov 10 03:50:54 2021 +0100| [f705e9ea0567c8dcf800ae1ee0647fca157c6199] | committer: Lynne

lavfi/vulkan: refactor, fix and fully implement multiple queues

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f705e9ea0567c8dcf800ae1ee0647fca157c6199
---

 libavfilter/vf_avgblur_vulkan.c   |  27 +--
 libavfilter/vf_chromaber_vulkan.c |  25 +--
 libavfilter/vf_overlay_vulkan.c   |  30 ++--
 libavfilter/vf_scale_vulkan.c     |  27 +--
 libavfilter/vulkan.c              | 340 +++++++++++++++++++++++---------------
 libavfilter/vulkan.h              | 103 +++++++-----
 6 files changed, 336 insertions(+), 216 deletions(-)

diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c
index bf02dab1db..4795e482a9 100644
--- a/libavfilter/vf_avgblur_vulkan.c
+++ b/libavfilter/vf_avgblur_vulkan.c
@@ -24,12 +24,13 @@
 #define CGS 32
 
 typedef struct AvgBlurVulkanContext {
-    VulkanFilterContext vkctx;
+    FFVulkanContext vkctx;
 
     int initialized;
+    FFVkQueueFamilyCtx qf;
     FFVkExecContext *exec;
-    VulkanPipeline *pl_hor;
-    VulkanPipeline *pl_ver;
+    FFVulkanPipeline *pl_hor;
+    FFVulkanPipeline *pl_ver;
 
     /* Shader updators, must be in the main filter struct */
     VkDescriptorImageInfo input_images[3];
@@ -73,16 +74,14 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     FFSPIRVShader *shd;
     AvgBlurVulkanContext *s = ctx->priv;
     const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-    VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR);
 
-    VulkanDescriptorSetBinding desc_i[2] = {
+    FFVulkanDescriptorSetBinding desc_i[2] = {
         {
             .name       = "input_img",
             .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-            .samplers   = DUP_SAMPLER_ARRAY4(*sampler),
         },
         {
             .name       = "output_img",
@@ -95,17 +94,17 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
         },
     };
 
-    if (!sampler)
-        return AVERROR_EXTERNAL;
+    ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
 
-    s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
-    s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
+    desc_i[0].sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR);
+    if (!desc_i[0].sampler)
+        return AVERROR_EXTERNAL;
 
     { /* Create shader for the horizontal pass */
         desc_i[0].updater = s->input_images;
         desc_i[1].updater = s->tmp_images;
 
-        s->pl_hor = ff_vk_create_pipeline(ctx);
+        s->pl_hor = ff_vk_create_pipeline(ctx, &s->qf);
         if (!s->pl_hor)
             return AVERROR(ENOMEM);
 
@@ -148,7 +147,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
         desc_i[0].updater = s->tmp_images;
         desc_i[1].updater = s->output_images;
 
-        s->pl_ver = ff_vk_create_pipeline(ctx);
+        s->pl_ver = ff_vk_create_pipeline(ctx, &s->qf);
         if (!s->pl_ver)
             return AVERROR(ENOMEM);
 
@@ -188,7 +187,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     }
 
     /* Execution context */
-    RET(ff_vk_create_exec_ctx(ctx, &s->exec));
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
 
     s->initialized = 1;
 
@@ -311,6 +310,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f
     if (err)
         return err;
 
+    ff_vk_qf_rotate(&s->qf);
+
     return err;
 
 fail:
diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c
index 25ef20fe73..83ab72f716 100644
--- a/libavfilter/vf_chromaber_vulkan.c
+++ b/libavfilter/vf_chromaber_vulkan.c
@@ -24,11 +24,12 @@
 #define CGROUPS (int [3]){ 32, 32, 1 }
 
 typedef struct ChromaticAberrationVulkanContext {
-    VulkanFilterContext vkctx;
+    FFVulkanContext vkctx;
 
     int initialized;
+    FFVkQueueFamilyCtx qf;
     FFVkExecContext *exec;
-    VulkanPipeline *pl;
+    FFVulkanPipeline *pl;
 
     /* Shader updators, must be in the main filter struct */
     VkDescriptorImageInfo input_images[3];
@@ -67,17 +68,18 @@ static const char distort_chroma_kernel[] = {
 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 {
     int err;
+    FFVkSampler *sampler;
     ChromaticAberrationVulkanContext *s = ctx->priv;
+    const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+    ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
 
     /* Create a sampler */
-    VkSampler *sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR);
+    sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR);
     if (!sampler)
         return AVERROR_EXTERNAL;
 
-    s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
-    s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
-
-    s->pl = ff_vk_create_pipeline(ctx);
+    s->pl = ff_vk_create_pipeline(ctx, &s->qf);
     if (!s->pl)
         return AVERROR(ENOMEM);
 
@@ -86,8 +88,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f;
 
     { /* Create the shader */
-        const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-        VulkanDescriptorSetBinding desc_i[2] = {
+        FFVulkanDescriptorSetBinding desc_i[2] = {
             {
                 .name       = "input_img",
                 .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
@@ -95,7 +96,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
                 .elems      = planes,
                 .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
                 .updater    = s->input_images,
-                .samplers   = DUP_SAMPLER_ARRAY4(*sampler),
+                .sampler    = sampler,
             },
             {
                 .name       = "output_img",
@@ -158,7 +159,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     RET(ff_vk_init_compute_pipeline(ctx, s->pl));
 
     /* Execution context */
-    RET(ff_vk_create_exec_ctx(ctx, &s->exec));
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
 
     s->initialized = 1;
 
@@ -256,6 +257,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
     if (err)
         return err;
 
+    ff_vk_qf_rotate(&s->qf);
+
     return err;
 
 fail:
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
index f08800af2c..b902ad83f5 100644
--- a/libavfilter/vf_overlay_vulkan.c
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -25,11 +25,12 @@
 #define CGROUPS (int [3]){ 32, 32, 1 }
 
 typedef struct OverlayVulkanContext {
-    VulkanFilterContext vkctx;
+    FFVulkanContext vkctx;
 
     int initialized;
-    VulkanPipeline *pl;
+    FFVkQueueFamilyCtx qf;
     FFVkExecContext *exec;
+    FFVulkanPipeline *pl;
     FFFrameSync fs;
     FFVkBuffer params_buf;
 
@@ -79,23 +80,24 @@ static const char overlay_alpha[] = {
 static av_cold int init_filter(AVFilterContext *ctx)
 {
     int err;
+    FFVkSampler *sampler;
     OverlayVulkanContext *s = ctx->priv;
-    VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_NEAREST);
+    const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+    ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
+
+    sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_NEAREST);
     if (!sampler)
         return AVERROR_EXTERNAL;
 
-    s->pl = ff_vk_create_pipeline(ctx);
+    s->pl = ff_vk_create_pipeline(ctx, &s->qf);
     if (!s->pl)
         return AVERROR(ENOMEM);
 
-    s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
-    s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
-
     { /* Create the shader */
-        const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
         const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
 
-        VulkanDescriptorSetBinding desc_i[3] = {
+        FFVulkanDescriptorSetBinding desc_i[3] = {
             {
                 .name       = "main_img",
                 .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
@@ -103,7 +105,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
                 .elems      = planes,
                 .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
                 .updater    = s->main_images,
-                .samplers   = DUP_SAMPLER_ARRAY4(*sampler),
+                .sampler    = sampler,
             },
             {
                 .name       = "overlay_img",
@@ -112,7 +114,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
                 .elems      = planes,
                 .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
                 .updater    = s->overlay_images,
-                .samplers   = DUP_SAMPLER_ARRAY4(*sampler),
+                .sampler    = sampler,
             },
             {
                 .name       = "output_img",
@@ -126,7 +128,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
             },
         };
 
-        VulkanDescriptorSetBinding desc_b = {
+        FFVulkanDescriptorSetBinding desc_b = {
             .name        = "params",
             .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
             .mem_quali   = "readonly",
@@ -215,7 +217,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
     }
 
     /* Execution context */
-    RET(ff_vk_create_exec_ctx(ctx, &s->exec));
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
 
     s->initialized = 1;
 
@@ -339,6 +341,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
     if (err)
         return err;
 
+    ff_vk_qf_rotate(&s->qf);
+
     return err;
 
 fail:
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 680e9c2f4f..3a2251f8df 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -33,11 +33,12 @@ enum ScalerFunc {
 };
 
 typedef struct ScaleVulkanContext {
-    VulkanFilterContext vkctx;
+    FFVulkanContext vkctx;
 
     int initialized;
+    FFVkQueueFamilyCtx qf;
     FFVkExecContext *exec;
-    VulkanPipeline *pl;
+    FFVulkanPipeline *pl;
     FFVkBuffer params_buf;
 
     /* Shader updators, must be in the main filter struct */
@@ -107,7 +108,7 @@ static const char write_444[] = {
 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 {
     int err;
-    VkSampler *sampler;
+    FFVkSampler *sampler;
     VkFilter sampler_mode;
     ScaleVulkanContext *s = ctx->priv;
 
@@ -115,9 +116,9 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     int crop_y = in->crop_top;
     int crop_w = in->width - (in->crop_left + in->crop_right);
     int crop_h = in->height - (in->crop_top + in->crop_bottom);
+    int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
 
-    s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
-    s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
+    ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
 
     switch (s->scaler) {
     case F_NEAREST:
@@ -133,20 +134,20 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     if (!sampler)
         return AVERROR_EXTERNAL;
 
-    s->pl = ff_vk_create_pipeline(ctx);
+    s->pl = ff_vk_create_pipeline(ctx, &s->qf);
     if (!s->pl)
         return AVERROR(ENOMEM);
 
     { /* Create the shader */
-        VulkanDescriptorSetBinding desc_i[2] = {
+        FFVulkanDescriptorSetBinding desc_i[2] = {
             {
                 .name       = "input_img",
                 .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
                 .dimensions = 2,
-                .elems      = av_pix_fmt_count_planes(s->vkctx.input_format),
+                .elems      = in_planes,
                 .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
                 .updater    = s->input_images,
-                .samplers   = DUP_SAMPLER_ARRAY4(*sampler),
+                .sampler    = sampler,
             },
             {
                 .name       = "output_img",
@@ -160,7 +161,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
             },
         };
 
-        VulkanDescriptorSetBinding desc_b = {
+        FFVulkanDescriptorSetBinding desc_b = {
             .name        = "params",
             .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
             .mem_quali   = "readonly",
@@ -178,7 +179,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
         ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS);
 
         RET(ff_vk_add_descriptor_set(ctx, s->pl, shd,  desc_i, 2, 0)); /* set 0 */
-        RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 0 */
+        RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
 
         GLSLD(   scale_bilinear                                                  );
 
@@ -280,7 +281,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     }
 
     /* Execution context */
-    RET(ff_vk_create_exec_ctx(ctx, &s->exec));
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
 
     s->initialized = 1;
 
@@ -384,6 +385,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
     if (err)
         return err;
 
+    ff_vk_qf_rotate(&s->qf);
+
     return err;
 
 fail:
diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c
index c9a2ae4593..a30699963e 100644
--- a/libavfilter/vulkan.c
+++ b/libavfilter/vulkan.c
@@ -20,6 +20,7 @@
 #include "vulkan.h"
 #include "glslang.h"
 
+#include "libavutil/avassert.h"
 #include "libavutil/vulkan_loader.h"
 
 /* Generic macro for creating contexts which need to keep their addresses
@@ -88,15 +89,54 @@ const char *ff_vk_ret2str(VkResult res)
 #undef CASE
 }
 
+void ff_vk_qf_init(AVFilterContext *avctx, FFVkQueueFamilyCtx *qf,
+                   VkQueueFlagBits dev_family, int queue_limit)
+{
+    FFVulkanContext *s = avctx->priv;
+
+    if (!queue_limit)
+        queue_limit = INT32_MAX;
+
+    switch (dev_family) {
+    case VK_QUEUE_GRAPHICS_BIT:
+        qf->queue_family = s->hwctx->queue_family_index;
+        qf->nb_queues = FFMIN(s->hwctx->nb_graphics_queues, queue_limit);
+        return;
+    case VK_QUEUE_COMPUTE_BIT:
+        qf->queue_family = s->hwctx->queue_family_comp_index;
+        qf->nb_queues = FFMIN(s->hwctx->nb_comp_queues, queue_limit);
+        return;
+    case VK_QUEUE_TRANSFER_BIT:
+        qf->queue_family = s->hwctx->queue_family_tx_index;
+        qf->nb_queues = FFMIN(s->hwctx->nb_tx_queues, queue_limit);
+        return;
+    case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
+        qf->queue_family = s->hwctx->queue_family_encode_index;
+        qf->nb_queues = FFMIN(s->hwctx->nb_encode_queues, queue_limit);
+        return;
+    case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
+        qf->queue_family = s->hwctx->queue_family_decode_index;
+        qf->nb_queues = FFMIN(s->hwctx->nb_decode_queues, queue_limit);
+        return;
+    default:
+        av_assert0(0); /* Should never happen */
+    }
+
+    return;
+}
+
+void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
+{
+    qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
+}
+
 static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
                         VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
                         VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
 {
     VkResult ret;
     int index = -1;
-    VkPhysicalDeviceProperties props;
-    VkPhysicalDeviceMemoryProperties mprops;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
     VkMemoryAllocateInfo alloc_info = {
@@ -104,24 +144,21 @@ static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
         .pNext           = alloc_extension,
     };
 
-    vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
-    vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
-
     /* Align if we need to */
     if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-        req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment);
+        req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
 
     alloc_info.allocationSize = req->size;
 
     /* The vulkan spec requires memory types to be sorted in the "optimal"
      * order, so the first matching type we find will be the best/fastest one */
-    for (int i = 0; i < mprops.memoryTypeCount; i++) {
+    for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
         /* The memory type must be supported by the requirements (bitfield) */
         if (!(req->memoryTypeBits & (1 << i)))
             continue;
 
         /* The memory type flags must include our properties */
-        if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+        if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
             continue;
 
         /* Found a suitable memory type */
@@ -145,7 +182,7 @@ static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
         return AVERROR(ENOMEM);
     }
 
-    *mem_flags |= mprops.memoryTypes[index].propertyFlags;
+    *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
 
     return 0;
 }
@@ -156,7 +193,7 @@ int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
     int err;
     VkResult ret;
     int use_ded_mem;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
     VkBufferCreateInfo buf_spawn = {
@@ -220,7 +257,7 @@ int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
                       int nb_buffers, int invalidate)
 {
     VkResult ret;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
     VkMappedMemoryRange *inval_list = NULL;
     int inval_count = 0;
@@ -271,7 +308,7 @@ int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
 {
     int err = 0;
     VkResult ret;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
     VkMappedMemoryRange *flush_list = NULL;
     int flush_count = 0;
@@ -311,7 +348,7 @@ int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
 
 void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
 {
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
     if (!buf)
@@ -323,7 +360,7 @@ void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
         vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
 }
 
-int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
+int ff_vk_add_push_constant(AVFilterContext *avctx, FFVulkanPipeline *pl,
                             int offset, int size, VkShaderStageFlagBits stage)
 {
     VkPushConstantRange *pc;
@@ -343,37 +380,37 @@ int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
     return 0;
 }
 
-FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
-int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
+FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx,
+                          FFVkQueueFamilyCtx *qf)
 {
     VkResult ret;
     FFVkExecContext *e;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
-    int queue_family = s->queue_family_idx;
-    int nb_queues = s->queue_count;
-
     VkCommandPoolCreateInfo cqueue_create = {
         .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
         .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
-        .queueFamilyIndex   = queue_family,
+        .queueFamilyIndex   = qf->queue_family,
     };
     VkCommandBufferAllocateInfo cbuf_create = {
         .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
         .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
-        .commandBufferCount = nb_queues,
+        .commandBufferCount = qf->nb_queues,
     };
 
     e = create_exec_ctx(s);
     if (!e)
         return AVERROR(ENOMEM);
 
-    e->queues = av_mallocz(nb_queues * sizeof(*e->queues));
+    e->qf = qf;
+
+    e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
     if (!e->queues)
         return AVERROR(ENOMEM);
 
-    e->bufs = av_mallocz(nb_queues * sizeof(*e->bufs));
+    e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
     if (!e->bufs)
         return AVERROR(ENOMEM);
 
@@ -396,9 +433,9 @@ int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
         return AVERROR_EXTERNAL;
     }
 
-    for (int i = 0; i < nb_queues; i++) {
+    for (int i = 0; i < qf->nb_queues; i++) {
         FFVkQueueCtx *q = &e->queues[i];
-        vk->GetDeviceQueue(s->hwctx->act_dev, queue_family, i, &q->queue);
+        vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, i, &q->queue);
     }
 
     *ctx = e;
@@ -408,8 +445,7 @@ int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
 
 void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
 {
-    VulkanFilterContext *s = avctx->priv;
-    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
+    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
 
     for (int j = 0; j < q->nb_buf_deps; j++)
         av_buffer_unref(&q->buf_deps[j]);
@@ -426,9 +462,9 @@ void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
 int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
 {
     VkResult ret;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
-    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
+    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
 
     VkCommandBufferBeginInfo cmd_start = {
         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
@@ -455,7 +491,7 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
     /* Discard queue dependencies */
     ff_vk_discard_exec_deps(avctx, e);
 
-    ret = vk->BeginCommandBuffer(e->bufs[s->cur_queue_idx], &cmd_start);
+    ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
     if (ret != VK_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
                ff_vk_ret2str(ret));
@@ -467,17 +503,15 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
 
 VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e)
 {
-    VulkanFilterContext *s = avctx->priv;
-    return e->bufs[s->cur_queue_idx];
+    return e->bufs[e->qf->cur_queue];
 }
 
 int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
                        AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
 {
     AVFrame **dst;
-    VulkanFilterContext *s = avctx->priv;
     AVVkFrame *f = (AVVkFrame *)frame->data[0];
-    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
+    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
     AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
     int planes = av_pix_fmt_count_planes(fc->sw_format);
 
@@ -517,16 +551,21 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
             return AVERROR(ENOMEM);
         }
 
+        e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
+                                             (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
+        if (!e->sem_sig_val_dst) {
+            ff_vk_discard_exec_deps(avctx, e);
+            return AVERROR(ENOMEM);
+        }
+
         e->sem_wait[e->sem_wait_cnt] = f->sem[i];
         e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
         e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
         e->sem_wait_cnt++;
 
-        /* TODO: fix this in case execution fails */
-        f->sem_value[i]++;
-
         e->sem_sig[e->sem_sig_cnt] = f->sem[i];
-        e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i];
+        e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
+        e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
         e->sem_sig_cnt++;
     }
 
@@ -551,9 +590,9 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
 int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
 {
     VkResult ret;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
-    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
+    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
 
     VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@@ -568,7 +607,7 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
         .pNext                = &s_timeline_sem_info,
 
         .commandBufferCount   = 1,
-        .pCommandBuffers      = &e->bufs[s->cur_queue_idx],
+        .pCommandBuffers      = &e->bufs[e->qf->cur_queue],
 
         .pWaitSemaphores      = e->sem_wait,
         .pWaitDstStageMask    = e->sem_wait_dst,
@@ -578,7 +617,7 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
         .signalSemaphoreCount = e->sem_sig_cnt,
     };
 
-    ret = vk->EndCommandBuffer(e->bufs[s->cur_queue_idx]);
+    ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
     if (ret != VK_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
                ff_vk_ret2str(ret));
@@ -592,8 +631,8 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
         return AVERROR_EXTERNAL;
     }
 
-    /* Rotate queues */
-    s->cur_queue_idx = (s->cur_queue_idx + 1) % s->queue_count;
+    for (int i = 0; i < e->sem_sig_cnt; i++)
+        *e->sem_sig_val_dst[i] += 1;
 
     return 0;
 }
@@ -602,8 +641,7 @@ int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e,
                            AVBufferRef **deps, int nb_deps)
 {
     AVBufferRef **dst;
-    VulkanFilterContext *s = avctx->priv;
-    FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
+    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
 
     if (!deps || !nb_deps)
         return 0;
@@ -632,7 +670,7 @@ err:
 static int vulkan_filter_set_device(AVFilterContext *avctx,
                                     AVBufferRef *device)
 {
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
 
     av_buffer_unref(&s->device_ref);
 
@@ -649,7 +687,7 @@ static int vulkan_filter_set_device(AVFilterContext *avctx,
 static int vulkan_filter_set_frames(AVFilterContext *avctx,
                                     AVBufferRef *frames)
 {
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
 
     av_buffer_unref(&s->frames_ref);
 
@@ -664,7 +702,8 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
 {
     int err;
     AVFilterContext *avctx = inlink->dst;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
+    FFVulkanFunctions *vk = &s->vkfn;
     AVHWFramesContext *input_frames;
 
     if (!inlink->hw_frames_ctx) {
@@ -695,6 +734,9 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
     if (err < 0)
         return err;
 
+    vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props);
+    vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+
     /* Default output parameters match input parameters. */
     s->input_format = input_frames->sw_format;
     if (s->output_format == AV_PIX_FMT_NONE)
@@ -711,7 +753,7 @@ int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
 {
     int err;
     AVFilterContext *avctx = outlink->src;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
 
     av_buffer_unref(&outlink->hw_frames_ctx);
 
@@ -741,7 +783,7 @@ int ff_vk_filter_config_output(AVFilterLink *outlink)
 {
     int err;
     AVFilterContext *avctx = outlink->src;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     AVBufferRef *output_frames_ref;
     AVHWFramesContext *output_frames;
 
@@ -790,7 +832,7 @@ fail:
 
 int ff_vk_filter_init(AVFilterContext *avctx)
 {
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
 
     s->output_format = AV_PIX_FMT_NONE;
 
@@ -800,12 +842,12 @@ int ff_vk_filter_init(AVFilterContext *avctx)
     return 0;
 }
 
-FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num)
-VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
-                              VkFilter filt)
+FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
+FFVkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
+                                VkFilter filt)
 {
     VkResult ret;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
     VkSamplerCreateInfo sampler_info = {
@@ -823,19 +865,22 @@ VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
         .unnormalizedCoordinates = unnorm_coords,
     };
 
-    VkSampler *sampler = create_sampler(s);
-    if (!sampler)
+    FFVkSampler *sctx = create_sampler(s);
+    if (!sctx)
         return NULL;
 
     ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
-                            s->hwctx->alloc, sampler);
+                            s->hwctx->alloc, &sctx->sampler[0]);
     if (ret != VK_SUCCESS) {
         av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
                ff_vk_ret2str(ret));
         return NULL;
     }
 
-    return sampler;
+    for (int i = 1; i < 4; i++)
+        sctx->sampler[i] = sctx->sampler[0];
+
+    return sctx;
 }
 
 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
@@ -863,7 +908,7 @@ typedef struct ImageViewCtx {
 
 static void destroy_imageview(void *opaque, uint8_t *data)
 {
-    VulkanFilterContext *s = opaque;
+    FFVulkanContext *s = opaque;
     FFVulkanFunctions *vk = &s->vkfn;
     ImageViewCtx *iv = (ImageViewCtx *)data;
 
@@ -877,7 +922,7 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
 {
     int err;
     AVBufferRef *buf;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
     VkImageViewCreateInfo imgview_spawn = {
@@ -924,8 +969,8 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
     return 0;
 }
 
-FN_CREATING(VulkanPipeline, FFSPIRVShader, shader, shaders, shaders_num)
-FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl,
+FN_CREATING(FFVulkanPipeline, FFSPIRVShader, shader, shaders, shaders_num)
+FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, FFVulkanPipeline *pl,
                                  const char *name, VkShaderStageFlags stage)
 {
     FFSPIRVShader *shd = create_shader(pl);
@@ -984,7 +1029,7 @@ int ff_vk_compile_shader(AVFilterContext *avctx, FFSPIRVShader *shd,
 {
     int err;
     VkResult ret;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
     VkShaderModuleCreateInfo shader_create;
     uint8_t *spirv;
@@ -1043,25 +1088,24 @@ static const struct descriptor_props {
     [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER]   = { sizeof(VkBufferView),           "imageBuffer",   1, 0, 0, 0, },
 };
 
-int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
-                             FFSPIRVShader *shd, VulkanDescriptorSetBinding *desc,
+int ff_vk_add_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
+                             FFSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
                              int num, int only_print_to_shader)
 {
     VkResult ret;
     VkDescriptorSetLayout *layout;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
     if (only_print_to_shader)
         goto print;
 
     pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
-                                       pl->desc_layout_num + 1);
+                                       pl->desc_layout_num + pl->qf->nb_queues);
     if (!pl->desc_layout)
         return AVERROR(ENOMEM);
 
     layout = &pl->desc_layout[pl->desc_layout_num];
-    memset(layout, 0, sizeof(*layout));
 
     { /* Create descriptor set layout descriptions */
         VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
@@ -1076,21 +1120,27 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
             desc_binding[i].descriptorType     = desc[i].type;
             desc_binding[i].descriptorCount    = FFMAX(desc[i].elems, 1);
             desc_binding[i].stageFlags         = desc[i].stages;
-            desc_binding[i].pImmutableSamplers = desc[i].samplers;
+            desc_binding[i].pImmutableSamplers = desc[i].sampler ?
+                                                 desc[i].sampler->sampler :
+                                                 NULL;
         }
 
         desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
         desc_create_layout.pBindings = desc_binding;
         desc_create_layout.bindingCount = num;
 
-        ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
-                                            s->hwctx->alloc, layout);
-        av_free(desc_binding);
-        if (ret != VK_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
-                   "layout: %s\n", ff_vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
+        for (int i = 0; i < pl->qf->nb_queues; i++) {
+            ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
+                                                s->hwctx->alloc, &layout[i]);
+            if (ret != VK_SUCCESS) {
+                av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
+                       "layout: %s\n", ff_vk_ret2str(ret));
+                av_free(desc_binding);
+                return AVERROR_EXTERNAL;
+            }
         }
+
+        av_free(desc_binding);
     }
 
     { /* Pool each descriptor by type and update pool counts */
@@ -1108,7 +1158,7 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
                 memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
             }
             pl->pool_size_desc[j].type             = desc[i].type;
-            pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1);
+            pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
         }
     }
 
@@ -1132,27 +1182,32 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
 
         pl->desc_template_info = av_realloc_array(pl->desc_template_info,
                                                   sizeof(*pl->desc_template_info),
-                                                  pl->desc_layout_num + 1);
+                                                  pl->total_descriptor_sets + pl->qf->nb_queues);
         if (!pl->desc_template_info)
             return AVERROR(ENOMEM);
 
-        dt = &pl->desc_template_info[pl->desc_layout_num];
-        memset(dt, 0, sizeof(*dt));
+        dt = &pl->desc_template_info[pl->total_descriptor_sets];
+        memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
 
-        dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
-        dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
-        dt->descriptorSetLayout = *layout;
-        dt->pDescriptorUpdateEntries = des_entries;
-        dt->descriptorUpdateEntryCount = num;
+        for (int i = 0; i < pl->qf->nb_queues; i++) {
+            dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
+            dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
+            dt[i].descriptorSetLayout = layout[i];
+            dt[i].pDescriptorUpdateEntries = des_entries;
+            dt[i].descriptorUpdateEntryCount = num;
+        }
     }
 
-    pl->desc_layout_num++;
+    pl->descriptor_sets_num++;
+
+    pl->desc_layout_num += pl->qf->nb_queues;
+    pl->total_descriptor_sets += pl->qf->nb_queues;
 
 print:
     /* Write shader info */
     for (int i = 0; i < num; i++) {
         const struct descriptor_props *prop = &descriptor_props[desc[i].type];
-        GLSLA("layout (set = %i, binding = %i", pl->desc_layout_num - 1, i);
+        GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
 
         if (desc[i].mem_layout)
             GLSLA(", %s", desc[i].mem_layout);
@@ -1184,12 +1239,14 @@ print:
     return 0;
 }
 
-void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
+void ff_vk_update_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
                                  int set_id)
 {
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
+    set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
+
     vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
                                         pl->desc_set[set_id],
                                         pl->desc_template[set_id],
@@ -1200,27 +1257,29 @@ void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
                             VkShaderStageFlagBits stage, int offset,
                             size_t size, void *src)
 {
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
-    vk->CmdPushConstants(e->bufs[s->cur_queue_idx], e->bound_pl->pipeline_layout,
+    vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
                          stage, offset, size, src);
 }
 
-int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
+int ff_vk_init_pipeline_layout(AVFilterContext *avctx, FFVulkanPipeline *pl)
 {
     VkResult ret;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
-    pl->descriptor_sets_num = pl->desc_layout_num * s->queue_count;
+    pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
+    if (!pl->desc_staging)
+        return AVERROR(ENOMEM);
 
     { /* Init descriptor set pool */
         VkDescriptorPoolCreateInfo pool_create_info = {
             .sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
             .poolSizeCount = pl->pool_size_desc_num,
             .pPoolSizes    = pl->pool_size_desc,
-            .maxSets       = pl->descriptor_sets_num,
+            .maxSets       = pl->total_descriptor_sets,
         };
 
         ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
@@ -1237,11 +1296,11 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
         VkDescriptorSetAllocateInfo alloc_info = {
             .sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
             .descriptorPool     = pl->desc_pool,
-            .descriptorSetCount = pl->descriptor_sets_num,
+            .descriptorSetCount = pl->total_descriptor_sets,
             .pSetLayouts        = pl->desc_layout,
         };
 
-        pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set));
+        pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
         if (!pl->desc_set)
             return AVERROR(ENOMEM);
 
@@ -1257,12 +1316,14 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
     { /* Finally create the pipeline layout */
         VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
             .sType                  = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-            .setLayoutCount         = pl->desc_layout_num,
-            .pSetLayouts            = pl->desc_layout,
+            .pSetLayouts            = (VkDescriptorSetLayout *)pl->desc_staging,
             .pushConstantRangeCount = pl->push_consts_num,
             .pPushConstantRanges    = pl->push_consts,
         };
 
+        for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
+            pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
+
         ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
                                        s->hwctx->alloc, &pl->pipeline_layout);
         av_freep(&pl->push_consts);
@@ -1275,21 +1336,19 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
     }
 
     { /* Descriptor template (for tightly packed descriptors) */
-        VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
+        VkDescriptorUpdateTemplateCreateInfo *dt;
 
-        pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template));
+        pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
         if (!pl->desc_template)
             return AVERROR(ENOMEM);
 
         /* Create update templates for the descriptor sets */
-        for (int i = 0; i < pl->descriptor_sets_num; i++) {
-            desc_template_info = &pl->desc_template_info[i % pl->desc_layout_num];
-            desc_template_info->pipelineLayout = pl->pipeline_layout;
+        for (int i = 0; i < pl->total_descriptor_sets; i++) {
+            dt = &pl->desc_template_info[i];
+            dt->pipelineLayout = pl->pipeline_layout;
             ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
-                                                     desc_template_info,
-                                                     s->hwctx->alloc,
+                                                     dt, s->hwctx->alloc,
                                                      &pl->desc_template[i]);
-            av_free((void *)desc_template_info->pDescriptorUpdateEntries);
             if (ret != VK_SUCCESS) {
                 av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
                        "template: %s\n", ff_vk_ret2str(ret));
@@ -1297,23 +1356,34 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
             }
         }
 
+        /* Free the duplicated memory used for the template entries */
+        for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
+            dt = &pl->desc_template_info[i];
+            av_free((void *)dt->pDescriptorUpdateEntries);
+        }
+
         av_freep(&pl->desc_template_info);
     }
 
     return 0;
 }
 
-FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num)
-VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx)
+FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
+FFVulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx,
+                                      FFVkQueueFamilyCtx *qf)
 {
-    return create_pipeline(avctx->priv);
+    FFVulkanPipeline *pl = create_pipeline(avctx->priv);
+    if (pl)
+        pl->qf = qf;
+
+    return pl;
 }
 
-int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
+int ff_vk_init_compute_pipeline(AVFilterContext *avctx, FFVulkanPipeline *pl)
 {
     int i;
     VkResult ret;
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
     VkComputePipelineCreateInfo pipe = {
@@ -1346,26 +1416,31 @@ int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
 }
 
 void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
-                              VulkanPipeline *pl)
+                              FFVulkanPipeline *pl)
 {
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
-    vk->CmdBindPipeline(e->bufs[s->cur_queue_idx], pl->bind_point, pl->pipeline);
+    vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
 
-    vk->CmdBindDescriptorSets(e->bufs[s->cur_queue_idx], pl->bind_point,
-                              pl->pipeline_layout, 0, pl->descriptor_sets_num,
-                              pl->desc_set, 0, 0);
+    for (int i = 0; i < pl->descriptor_sets_num; i++)
+        pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
+
+    vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
+                              pl->pipeline_layout, 0,
+                              pl->descriptor_sets_num,
+                              (VkDescriptorSet *)pl->desc_staging,
+                              0, NULL);
 
     e->bound_pl = pl;
 }
 
-static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
+static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
 {
     FFVulkanFunctions *vk = &s->vkfn;
 
     /* Make sure all queues have finished executing */
-    for (int i = 0; i < s->queue_count; i++) {
+    for (int i = 0; i < e->qf->nb_queues; i++) {
         FFVkQueueCtx *q = &e->queues[i];
 
         if (q->fence) {
@@ -1389,7 +1464,7 @@ static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
     }
 
     if (e->bufs)
-        vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, s->queue_count, e->bufs);
+        vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
     if (e->pool)
         vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
 
@@ -1397,13 +1472,14 @@ static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
     av_freep(&e->queues);
     av_freep(&e->sem_sig);
     av_freep(&e->sem_sig_val);
+    av_freep(&e->sem_sig_val_dst);
     av_freep(&e->sem_wait);
     av_freep(&e->sem_wait_dst);
     av_freep(&e->sem_wait_val);
     av_free(e);
 }
 
-static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
+static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
 {
     FFVulkanFunctions *vk = &s->vkfn;
 
@@ -1433,6 +1509,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
         vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
                                   s->hwctx->alloc);
 
+    av_freep(&pl->desc_staging);
     av_freep(&pl->desc_set);
     av_freep(&pl->shaders);
     av_freep(&pl->desc_layout);
@@ -1443,8 +1520,10 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
     /* Only freed in case of failure */
     av_freep(&pl->pool_size_desc);
     if (pl->desc_template_info) {
-        for (int i = 0; i < pl->descriptor_sets_num; i++)
-            av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries);
+        for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
+            VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
+            av_free((void *)dt->pDescriptorUpdateEntries);
+        }
         av_freep(&pl->desc_template_info);
     }
 
@@ -1453,7 +1532,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
 
 void ff_vk_filter_uninit(AVFilterContext *avctx)
 {
-    VulkanFilterContext *s = avctx->priv;
+    FFVulkanContext *s = avctx->priv;
     FFVulkanFunctions *vk = &s->vkfn;
 
     ff_vk_glslang_uninit();
@@ -1463,7 +1542,8 @@ void ff_vk_filter_uninit(AVFilterContext *avctx)
     av_freep(&s->exec_ctx);
 
     for (int i = 0; i < s->samplers_num; i++) {
-        vk->DestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc);
+        vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
+                           s->hwctx->alloc);
         av_free(s->samplers[i]);
     }
     av_freep(&s->samplers);
diff --git a/libavfilter/vulkan.h b/libavfilter/vulkan.h
index 89b76ba355..9d17d2b14f 100644
--- a/libavfilter/vulkan.h
+++ b/libavfilter/vulkan.h
@@ -20,6 +20,7 @@
 #define AVFILTER_VULKAN_H
 
 #define VK_NO_PROTOTYPES
+#define VK_ENABLE_BETA_EXTENSIONS
 
 #include "avfilter.h"
 #include "libavutil/pixdesc.h"
@@ -52,9 +53,6 @@
             goto fail;                                                         \
     } while (0)
 
-/* Useful for attaching immutable samplers to arrays */
-#define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, }
-
 typedef struct FFSPIRVShader {
     const char *name;                       /* Name for id/debugging purposes */
     AVBPrint src;
@@ -62,7 +60,11 @@ typedef struct FFSPIRVShader {
     VkPipelineShaderStageCreateInfo shader;
 } FFSPIRVShader;
 
-typedef struct VulkanDescriptorSetBinding {
+typedef struct FFVkSampler {
+    VkSampler sampler[4];
+} FFVkSampler;
+
+typedef struct FFVulkanDescriptorSetBinding {
     const char         *name;
     VkDescriptorType    type;
     const char         *mem_layout;  /* Storage images (rgba8, etc.) and buffers (std430, etc.) */
@@ -71,9 +73,9 @@ typedef struct VulkanDescriptorSetBinding {
     uint32_t            dimensions;  /* Needed for e.g. sampler%iD */
     uint32_t            elems;       /* 0 - scalar, 1 or more - vector */
     VkShaderStageFlags  stages;
-    const VkSampler    *samplers;    /* Immutable samplers, length - #elems */
+    FFVkSampler        *sampler;     /* Sampler to use for all elems */
     void               *updater;     /* Pointer to VkDescriptor*Info */
-} VulkanDescriptorSetBinding;
+} FFVulkanDescriptorSetBinding;
 
 typedef struct FFVkBuffer {
     VkBuffer buf;
@@ -81,7 +83,15 @@ typedef struct FFVkBuffer {
     VkMemoryPropertyFlagBits flags;
 } FFVkBuffer;
 
-typedef struct VulkanPipeline {
+typedef struct FFVkQueueFamilyCtx {
+    int queue_family;
+    int nb_queues;
+    int cur_queue;
+} FFVkQueueFamilyCtx;
+
+typedef struct FFVulkanPipeline {
+    FFVkQueueFamilyCtx *qf;
+
     VkPipelineBindPoint bind_point;
 
     /* Contexts */
@@ -97,18 +107,21 @@ typedef struct VulkanPipeline {
     int push_consts_num;
 
     /* Descriptors */
-    VkDescriptorSetLayout      *desc_layout;
-    VkDescriptorPool            desc_pool;
-    VkDescriptorSet            *desc_set;
-    VkDescriptorUpdateTemplate *desc_template;
-    int                         desc_layout_num;
-    int                         descriptor_sets_num;
-    int                         pool_size_desc_num;
+    VkDescriptorSetLayout         *desc_layout;
+    VkDescriptorPool               desc_pool;
+    VkDescriptorSet               *desc_set;
+    void                         **desc_staging;
+    VkDescriptorSetLayoutBinding **desc_binding;
+    VkDescriptorUpdateTemplate    *desc_template;
+    int                            desc_layout_num;
+    int                            descriptor_sets_num;
+    int                            total_descriptor_sets;
+    int                            pool_size_desc_num;
 
     /* Temporary, used to store data in between initialization stages */
     VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
     VkDescriptorPoolSize *pool_size_desc;
-} VulkanPipeline;
+} FFVulkanPipeline;
 
 typedef struct FFVkQueueCtx {
     VkFence fence;
@@ -126,6 +139,8 @@ typedef struct FFVkQueueCtx {
 } FFVkQueueCtx;
 
 typedef struct FFVkExecContext {
+    FFVkQueueFamilyCtx *qf;
+
     VkCommandPool pool;
     VkCommandBuffer *bufs;
     FFVkQueueCtx *queues;
@@ -134,7 +149,7 @@ typedef struct FFVkExecContext {
     int *nb_deps;
     int *dep_alloc_size;
 
-    VulkanPipeline *bound_pl;
+    FFVulkanPipeline *bound_pl;
 
     VkSemaphore *sem_wait;
     int sem_wait_alloc; /* Allocated sem_wait */
@@ -152,23 +167,23 @@ typedef struct FFVkExecContext {
 
     uint64_t *sem_sig_val;
     int sem_sig_val_alloc;
+
+    uint64_t **sem_sig_val_dst;
+    int sem_sig_val_dst_alloc;
 } FFVkExecContext;
 
-typedef struct VulkanFilterContext {
+typedef struct FFVulkanContext {
     const AVClass         *class;
     FFVulkanFunctions     vkfn;
     FFVulkanExtensions    extensions;
+    VkPhysicalDeviceProperties props;
+    VkPhysicalDeviceMemoryProperties mprops;
 
     AVBufferRef           *device_ref;
     AVBufferRef           *frames_ref; /* For in-place filtering */
     AVHWDeviceContext     *device;
     AVVulkanDeviceContext *hwctx;
 
-    /* State - mirrored with the exec ctx */
-    int cur_queue_idx;
-    int queue_family_idx;
-    int queue_count;
-
     /* Properties */
     int                 output_width;
     int                output_height;
@@ -176,7 +191,7 @@ typedef struct VulkanFilterContext {
     enum AVPixelFormat  input_format;
 
     /* Samplers */
-    VkSampler **samplers;
+    FFVkSampler **samplers;
     int samplers_num;
 
     /* Exec contexts */
@@ -184,12 +199,12 @@ typedef struct VulkanFilterContext {
     int exec_ctx_num;
 
     /* Pipelines (each can have 1 shader of each type) */
-    VulkanPipeline **pipelines;
+    FFVulkanPipeline **pipelines;
     int pipelines_num;
 
     void *scratch; /* Scratch memory used only in functions */
     unsigned int scratch_size;
-} VulkanFilterContext;
+} FFVulkanContext;
 
 /* Identity mapping - r = r, b = b, g = g, a = a */
 extern const VkComponentMapping ff_comp_identity_map;
@@ -218,11 +233,23 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
  */
 const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
 
+/**
+ * Initialize a queue family.
+ * A queue limit of 0 means no limit.
+ */
+void ff_vk_qf_init(AVFilterContext *avctx, FFVkQueueFamilyCtx *qf,
+                   VkQueueFlagBits dev_family, int queue_limit);
+
+/**
+ * Rotate through the queues in a queue family.
+ */
+void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
+
 /**
  * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
  */
-VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
-                              VkFilter filt);
+FFVkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
+                                VkFilter filt);
 
 /**
  * Create an imageview.
@@ -237,19 +264,20 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
  * Define a push constant for a given stage into a pipeline.
  * Must be called before the pipeline layout has been initialized.
  */
-int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
+int ff_vk_add_push_constant(AVFilterContext *avctx, FFVulkanPipeline *pl,
                             int offset, int size, VkShaderStageFlagBits stage);
 
 /**
  * Inits a pipeline. Everything in it will be auto-freed when calling
  * ff_vk_filter_uninit().
  */
-VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx);
+FFVulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx,
+                                      FFVkQueueFamilyCtx *qf);
 
 /**
  * Inits a shader for a specific pipeline. Will be auto-freed on uninit.
  */
-FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl,
+FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, FFVulkanPipeline *pl,
                                  const char *name, VkShaderStageFlags stage);
 
 /**
@@ -261,8 +289,8 @@ void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, FFSPIRVShader *shd,
 /**
  * Adds a descriptor set to the shader and registers them in the pipeline.
  */
-int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
-                             FFSPIRVShader *shd, VulkanDescriptorSetBinding *desc,
+int ff_vk_add_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
+                             FFSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
                              int num, int only_print_to_shader);
 
 /**
@@ -280,27 +308,28 @@ void ff_vk_print_shader(AVFilterContext *avctx, FFSPIRVShader *shd, int prio);
  * Initializes the pipeline layout after all shaders and descriptor sets have
  * been finished.
  */
-int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl);
+int ff_vk_init_pipeline_layout(AVFilterContext *avctx, FFVulkanPipeline *pl);
 
 /**
  * Initializes a compute pipeline. Will pick the first shader with the
  * COMPUTE flag set.
  */
-int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl);
+int ff_vk_init_compute_pipeline(AVFilterContext *avctx, FFVulkanPipeline *pl);
 
 /**
  * Updates a descriptor set via the updaters defined.
  * Can be called immediately after pipeline creation, but must be called
  * at least once before queue submission.
  */
-void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
+void ff_vk_update_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
                                  int set_id);
 
 /**
  * Init an execution context for command recording and queue submission.
  * WIll be auto-freed on uninit.
  */
-int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx);
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx,
+                          FFVkQueueFamilyCtx *qf);
 
 /**
  * Begin recording to the command buffer. Previous execution must have been
@@ -313,7 +342,7 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e);
  * Must be called after ff_vk_start_exec_recording() and before submission.
  */
 void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
-                              VulkanPipeline *pl);
+                              FFVulkanPipeline *pl);
 
 /**
  * Updates push constants.



More information about the ffmpeg-cvslog mailing list