FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/random_seed.h"
22 #include "libavutil/opt.h"
23 #include "vulkan_filter.h"
24 #include "vulkan_spirv.h"
25 #include "internal.h"
26 #include "video.h"
27 
28 #define TYPE_NAME "vec4"
29 #define TYPE_ELEMS 4
30 #define TYPE_SIZE (TYPE_ELEMS*4)
31 
32 typedef struct NLMeansVulkanContext {
34 
38  VkSampler sampler;
39 
42 
44 
48 
51 
52  int *xoffsets;
53  int *yoffsets;
55  float strength[4];
56  int patch[4];
57 
58  struct nlmeans_opts {
59  int r;
60  double s;
61  double sc[4];
62  int p;
63  int pc[4];
64  int t;
65  } opts;
67 
68 extern const char *ff_source_prefix_sum_comp;
69 
70 static void insert_first(FFVkSPIRVShader *shd, int r, const char *off, int horiz, int plane, int comp)
71 {
72  GLSLF(4, s1 = texture(input_img[%i], pos + ivec2(%i + %s, %i + %s))[%i];
73  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
74 
75  GLSLF(4, s2[0] = texture(input_img[%i], pos + offs[0] + ivec2(%i + %s, %i + %s))[%i];
76  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
77  GLSLF(4, s2[1] = texture(input_img[%i], pos + offs[1] + ivec2(%i + %s, %i + %s))[%i];
78  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
79  GLSLF(4, s2[2] = texture(input_img[%i], pos + offs[2] + ivec2(%i + %s, %i + %s))[%i];
80  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
81  GLSLF(4, s2[3] = texture(input_img[%i], pos + offs[3] + ivec2(%i + %s, %i + %s))[%i];
82  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
83 
84  GLSLC(4, s2 = (s1 - s2) * (s1 - s2); );
85 }
86 
87 static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
88 {
89  GLSLF(1, pos.y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
90  if (!first)
91  GLSLC(1, barrier(); );
92  GLSLC(0, );
93  GLSLF(1, if (pos.y < height[%i]) { ,plane);
94  GLSLC(2, #pragma unroll(1) );
95  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
96  GLSLC(3, prefix_sum = DTYPE(0); );
97  GLSLC(3, offset = int_stride * uint64_t(pos.y + r); );
98  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
99  GLSLC(0, );
100  GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
101  if (first)
102  insert_first(shd, 0, "r", 0, plane, comp);
103  else
104  GLSLC(4, s2 = dst.v[pos.x]; );
105  GLSLC(4, dst.v[pos.x] = s2 + prefix_sum; );
106  GLSLC(4, prefix_sum += s2; );
107  GLSLC(3, } );
108  GLSLC(2, } );
109  GLSLC(1, } );
110  GLSLC(0, );
111 }
112 
113 static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
114 {
115  GLSLF(1, pos.x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
116  GLSLC(1, #pragma unroll(1) );
117  GLSLF(1, for (r = 0; r < %i; r++) ,nb_rows);
118  GLSLC(2, psum[r] = DTYPE(0); );
119  GLSLC(0, );
120  if (!first)
121  GLSLC(1, barrier(); );
122  GLSLC(0, );
123  GLSLF(1, if (pos.x < width[%i]) { ,plane);
124  GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
125  GLSLC(3, offset = int_stride * uint64_t(pos.y); );
126  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
127  GLSLC(0, );
128  GLSLC(3, #pragma unroll(1) );
129  GLSLF(3, for (r = 0; r < %i; r++) { ,nb_rows);
130  if (first)
131  insert_first(shd, 0, "r", 1, plane, comp);
132  else
133  GLSLC(4, s2 = dst.v[pos.x + r]; );
134  GLSLC(4, dst.v[pos.x + r] = s2 + psum[r]; );
135  GLSLC(4, psum[r] += s2; );
136  GLSLC(3, } );
137  GLSLC(2, } );
138  GLSLC(1, } );
139  GLSLC(0, );
140 }
141 
142 static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert,
143  int t, int dst_comp, int plane, int comp)
144 {
145  GLSLF(1, p = patch_size[%i]; ,dst_comp);
146  GLSLC(0, );
147  GLSLC(1, barrier(); );
148  GLSLC(0, );
149  if (!vert) {
150  GLSLF(1, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
151  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane);
152  GLSLC(3, break; );
153  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
154  GLSLF(3, pos.x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
155  } else {
156  GLSLF(1, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
157  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane);
158  GLSLC(3, break; );
159  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
160  GLSLF(3, pos.y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
161  }
162  GLSLC(0, );
163  GLSLC(3, a = DTYPE(0); );
164  GLSLC(3, b = DTYPE(0); );
165  GLSLC(3, c = DTYPE(0); );
166  GLSLC(3, d = DTYPE(0); );
167  GLSLC(0, );
168  GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); );
169  GLSLC(0, );
170  GLSLF(3, src[0] = texture(input_img[%i], pos + offs[0])[%i]; ,plane, comp);
171  GLSLF(3, src[1] = texture(input_img[%i], pos + offs[1])[%i]; ,plane, comp);
172  GLSLF(3, src[2] = texture(input_img[%i], pos + offs[2])[%i]; ,plane, comp);
173  GLSLF(3, src[3] = texture(input_img[%i], pos + offs[3])[%i]; ,plane, comp);
174  GLSLC(0, );
175  GLSLC(3, if (lt == false) { );
176  GLSLC(3, offset = int_stride * uint64_t(pos.y - p); );
177  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
178  GLSLC(4, a = dst.v[pos.x - p]; );
179  GLSLC(4, c = dst.v[pos.x + p]; );
180  GLSLC(3, offset = int_stride * uint64_t(pos.y + p); );
181  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
182  GLSLC(4, b = dst.v[pos.x - p]; );
183  GLSLC(4, d = dst.v[pos.x + p]; );
184  GLSLC(3, } );
185  GLSLC(0, );
186  GLSLC(3, patch_diff = d + a - b - c; );
187  GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp);
188  GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; );
189  GLSLC(3, sum = dot(w, src*255); );
190  GLSLC(0, );
191  if (t > 1) {
192  GLSLF(3, atomicAdd(weights_%i[pos.y*ws_stride[%i] + pos.x], w_sum); ,dst_comp, dst_comp);
193  GLSLF(3, atomicAdd(sums_%i[pos.y*ws_stride[%i] + pos.x], sum); ,dst_comp, dst_comp);
194  } else {
195  GLSLF(3, weights_%i[pos.y*ws_stride[%i] + pos.x] += w_sum; ,dst_comp, dst_comp);
196  GLSLF(3, sums_%i[pos.y*ws_stride[%i] + pos.x] += sum; ,dst_comp, dst_comp);
197  }
198  GLSLC(2, } );
199  GLSLC(1, } );
200 }
201 
202 typedef struct HorizontalPushData {
203  uint32_t width[4];
204  uint32_t height[4];
205  uint32_t ws_stride[4];
206  int32_t patch_size[4];
207  float strength[4];
208  VkDeviceAddress integral_base;
209  uint64_t integral_size;
210  uint64_t int_stride;
211  uint32_t xyoffs_start;
212 } HorizontalPushData;
213 
214 static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
216  VkSampler sampler, FFVkSPIRVCompiler *spv,
217  int width, int height, int t,
218  const AVPixFmtDescriptor *desc,
219  int planes, int *nb_rows)
220 {
221  int err;
222  uint8_t *spv_data;
223  size_t spv_len;
224  void *spv_opaque = NULL;
226  int max_dim = FFMAX(width, height);
227  uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0];
228  int wg_size, wg_rows;
229 
230  /* Round the max workgroup size to the previous power of two */
231  wg_size = max_wg;
232  wg_rows = 1;
233 
234  if (max_wg > max_dim) {
235  wg_size = max_dim;
236  } else if (max_wg < max_dim) {
237  /* Make it fit */
238  while (wg_size*wg_rows < max_dim)
239  wg_rows++;
240  }
241 
242  RET(ff_vk_shader_init(pl, shd, "nlmeans_weights", VK_SHADER_STAGE_COMPUTE_BIT, 0));
243  ff_vk_shader_set_compute_sizes(shd, wg_size, 1, 1);
244  *nb_rows = wg_rows;
245 
246  if (t > 1)
247  GLSLC(0, #extension GL_EXT_shader_atomic_float : require );
248  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
249  GLSLC(0, );
250  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
251  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
252  GLSLC(0, );
253  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
254  GLSLC(1, DTYPE v[]; );
255  GLSLC(0, }; );
256  GLSLC(0, );
257  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
258  GLSLC(1, uvec4 width; );
259  GLSLC(1, uvec4 height; );
260  GLSLC(1, uvec4 ws_stride; );
261  GLSLC(1, ivec4 patch_size; );
262  GLSLC(1, vec4 strength; );
263  GLSLC(1, DataBuffer integral_base; );
264  GLSLC(1, uint64_t integral_size; );
265  GLSLC(1, uint64_t int_stride; );
266  GLSLC(1, uint xyoffs_start; );
267  GLSLC(0, }; );
268  GLSLC(0, );
269 
270  ff_vk_add_push_constant(pl, 0, sizeof(HorizontalPushData), VK_SHADER_STAGE_COMPUTE_BIT);
271 
272  desc_set = (FFVulkanDescriptorSetBinding []) {
273  {
274  .name = "input_img",
275  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
276  .dimensions = 2,
277  .elems = planes,
278  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
279  .samplers = DUP_SAMPLER(sampler),
280  },
281  {
282  .name = "weights_buffer_0",
283  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
284  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
285  .buf_content = "float weights_0[];",
286  },
287  {
288  .name = "sums_buffer_0",
289  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
290  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
291  .buf_content = "float sums_0[];",
292  },
293  {
294  .name = "weights_buffer_1",
295  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
296  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
297  .buf_content = "float weights_1[];",
298  },
299  {
300  .name = "sums_buffer_1",
301  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
302  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
303  .buf_content = "float sums_1[];",
304  },
305  {
306  .name = "weights_buffer_2",
307  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
308  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
309  .buf_content = "float weights_2[];",
310  },
311  {
312  .name = "sums_buffer_2",
313  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
314  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
315  .buf_content = "float sums_2[];",
316  },
317  {
318  .name = "weights_buffer_3",
319  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
320  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
321  .buf_content = "float weights_3[];",
322  },
323  {
324  .name = "sums_buffer_3",
325  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
326  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
327  .buf_content = "float sums_3[];",
328  },
329  };
330  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 1 + 2*desc->nb_components, 0, 0));
331 
332  desc_set = (FFVulkanDescriptorSetBinding []) {
333  {
334  .name = "xyoffsets_buffer",
335  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
336  .mem_quali = "readonly",
337  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
338  .buf_content = "ivec2 xyoffsets[];",
339  },
340  };
341  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 1, 1, 0));
342 
343  GLSLC(0, );
344  GLSLC(0, void main() );
345  GLSLC(0, { );
346  GLSLC(1, uint64_t offset; );
347  GLSLC(1, DataBuffer dst; );
348  GLSLC(1, float s1; );
349  GLSLC(1, DTYPE s2; );
350  GLSLC(1, DTYPE prefix_sum; );
351  GLSLF(1, DTYPE psum[%i]; ,*nb_rows);
352  GLSLC(1, int r; );
353  GLSLC(1, ivec2 pos; );
354  GLSLC(1, int p; );
355  GLSLC(0, );
356  GLSLC(1, DataBuffer integral_data; );
357  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
358  GLSLC(0, );
359  GLSLC(1, int invoc_idx = int(gl_WorkGroupID.z); );
360  GLSLC(0, );
361  GLSLC(1, offset = integral_size * invoc_idx; );
362  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
363  for (int i = 0; i < TYPE_ELEMS; i++)
364  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
365  GLSLC(0, );
366  GLSLC(1, DTYPE a; );
367  GLSLC(1, DTYPE b; );
368  GLSLC(1, DTYPE c; );
369  GLSLC(1, DTYPE d; );
370  GLSLC(0, );
371  GLSLC(1, DTYPE patch_diff; );
372  if (TYPE_ELEMS == 4) {
373  GLSLC(1, vec4 src; );
374  GLSLC(1, vec4 w; );
375  } else {
376  GLSLC(1, vec4 src[4]; );
377  GLSLC(1, vec4 w[4]; );
378  }
379  GLSLC(1, float w_sum; );
380  GLSLC(1, float sum; );
381  GLSLC(0, );
382  GLSLC(1, bool lt; );
383  GLSLC(1, bool gt; );
384  GLSLC(0, );
385 
386  for (int i = 0; i < desc->nb_components; i++) {
387  int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
388  if (width >= height) {
389  insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
390  insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
391  insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off);
392  } else {
393  insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
394  insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
395  insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off);
396  }
397  }
398 
399  GLSLC(0, } );
400 
401  RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque));
402  RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
403 
404  RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
405  RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
406 
407 fail:
408  if (spv_opaque)
409  spv->free_shader(spv, &spv_opaque);
410 
411  return err;
412 }
413 
414 typedef struct DenoisePushData {
415  uint32_t ws_stride[4];
416 } DenoisePushData;
417 
418 static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
420  VkSampler sampler, FFVkSPIRVCompiler *spv,
421  const AVPixFmtDescriptor *desc, int planes)
422 {
423  int err;
424  uint8_t *spv_data;
425  size_t spv_len;
426  void *spv_opaque = NULL;
428 
429  RET(ff_vk_shader_init(pl, shd, "nlmeans_denoise",
430  VK_SHADER_STAGE_COMPUTE_BIT, 0));
431 
432  ff_vk_shader_set_compute_sizes(shd, 32, 32, 1);
433 
434  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
435  GLSLC(1, uvec4 ws_stride; );
436  GLSLC(0, }; );
437 
438  ff_vk_add_push_constant(pl, 0, sizeof(DenoisePushData), VK_SHADER_STAGE_COMPUTE_BIT);
439 
440  desc_set = (FFVulkanDescriptorSetBinding []) {
441  {
442  .name = "input_img",
443  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
444  .dimensions = 2,
445  .elems = planes,
446  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
447  .samplers = DUP_SAMPLER(sampler),
448  },
449  {
450  .name = "output_img",
451  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
452  .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format),
453  .mem_quali = "writeonly",
454  .dimensions = 2,
455  .elems = planes,
456  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
457  },
458  {
459  .name = "weights_buffer_0",
460  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
461  .mem_quali = "readonly",
462  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
463  .buf_content = "float weights_0[];",
464  },
465  {
466  .name = "sums_buffer_0",
467  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
468  .mem_quali = "readonly",
469  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
470  .buf_content = "float sums_0[];",
471  },
472  {
473  .name = "weights_buffer_1",
474  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
475  .mem_quali = "readonly",
476  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
477  .buf_content = "float weights_1[];",
478  },
479  {
480  .name = "sums_buffer_1",
481  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
482  .mem_quali = "readonly",
483  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
484  .buf_content = "float sums_1[];",
485  },
486  {
487  .name = "weights_buffer_2",
488  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
489  .mem_quali = "readonly",
490  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
491  .buf_content = "float weights_2[];",
492  },
493  {
494  .name = "sums_buffer_2",
495  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
496  .mem_quali = "readonly",
497  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
498  .buf_content = "float sums_2[];",
499  },
500  {
501  .name = "weights_buffer_3",
502  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
503  .mem_quali = "readonly",
504  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
505  .buf_content = "float weights_3[];",
506  },
507  {
508  .name = "sums_buffer_3",
509  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
510  .mem_quali = "readonly",
511  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
512  .buf_content = "float sums_3[];",
513  },
514  };
515  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 2 + 2*desc->nb_components, 0, 0));
516 
517  GLSLC(0, void main() );
518  GLSLC(0, { );
519  GLSLC(1, ivec2 size; );
520  GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
521  GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); );
522  GLSLC(0, );
523  GLSLC(1, float w_sum; );
524  GLSLC(1, float sum; );
525  GLSLC(1, vec4 src; );
526  GLSLC(1, vec4 r; );
527  GLSLC(0, );
528  GLSLC(1, size = imageSize(output_img[plane]); );
529  GLSLC(1, if (!IS_WITHIN(pos, size)) );
530  GLSLC(2, return; );
531  GLSLC(0, );
532  GLSLC(1, src = texture(input_img[plane], pos); );
533  GLSLC(0, );
534  for (int c = 0; c < desc->nb_components; c++) {
535  int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8);
536  GLSLF(1, if (plane == %i) { ,desc->comp[c].plane);
537  GLSLF(2, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
538  GLSLF(2, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
539  GLSLF(2, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off);
540  GLSLC(1, } );
541  GLSLC(0, );
542  }
543  GLSLC(1, imageStore(output_img[plane], pos, r); );
544  GLSLC(0, } );
545 
546  RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque));
547  RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
548 
549  RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
550  RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
551 
552 fail:
553  if (spv_opaque)
554  spv->free_shader(spv, &spv_opaque);
555 
556  return err;
557 }
558 
560 {
561  int rad, err;
562  int xcnt = 0, ycnt = 0;
563  NLMeansVulkanContext *s = ctx->priv;
564  FFVulkanContext *vkctx = &s->vkctx;
565  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
566  FFVkSPIRVCompiler *spv;
567  int *offsets_buf;
568  int offsets_dispatched = 0, nb_dispatches = 0;
569 
570  const AVPixFmtDescriptor *desc;
572  if (!desc)
573  return AVERROR(EINVAL);
574 
575  if (!(s->opts.r & 1)) {
576  s->opts.r |= 1;
577  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
578  s->opts.r);
579  }
580 
581  if (!(s->opts.p & 1)) {
582  s->opts.p |= 1;
583  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
584  s->opts.p);
585  }
586 
587  for (int i = 0; i < 4; i++) {
588  double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s;
589  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
590  str = 10.0f*str;
591  str *= -str;
592  str = 255.0*255.0 / str;
593  s->strength[i] = str;
594  if (!(ps & 1)) {
595  ps |= 1;
596  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
597  ps);
598  }
599  s->patch[i] = ps / 2;
600  }
601 
602  rad = s->opts.r/2;
603  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
604  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
605  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
606  s->nb_offsets = 0;
607 
608  for (int x = -rad; x <= rad; x++) {
609  for (int y = -rad; y <= rad; y++) {
610  if (!x && !y)
611  continue;
612 
613  s->xoffsets[xcnt++] = x;
614  s->yoffsets[ycnt++] = y;
615  s->nb_offsets++;
616  }
617  }
618 
619  RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL,
620  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
621  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
622  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
623  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
624  RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0));
625 
626  for (int i = 0; i < 2*s->nb_offsets; i += 2) {
627  offsets_buf[i + 0] = s->xoffsets[i >> 1];
628  offsets_buf[i + 1] = s->yoffsets[i >> 1];
629  }
630 
631  RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1));
632 
633  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
634  if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) {
635  av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, "
636  "disabling dispatch parallelism\n");
637  s->opts.t = 1;
638  }
639 
640  spv = ff_vk_spirv_init();
641  if (!spv) {
642  av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
643  return AVERROR_EXTERNAL;
644  }
645 
646  ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
647  RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, 1, 0, 0, 0, NULL));
648  RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
649 
650  RET(init_weights_pipeline(vkctx, &s->e, &s->pl_weights, &s->shd_weights, s->sampler,
651  spv, s->vkctx.output_width, s->vkctx.output_height,
652  s->opts.t, desc, planes, &s->pl_weights_rows));
653 
654  RET(init_denoise_pipeline(vkctx, &s->e, &s->pl_denoise, &s->shd_denoise, s->sampler,
655  spv, desc, planes));
656 
657  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, NULL, 1, 0, 0,
658  s->xyoffsets_buf.address, s->xyoffsets_buf.size,
659  VK_FORMAT_UNDEFINED));
660 
661  do {
662  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
663  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
664  offsets_dispatched += wg_invoc * TYPE_ELEMS;
665  nb_dispatches++;
666  } while (offsets_dispatched < s->nb_offsets);
667 
668  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n",
669  s->nb_offsets, nb_dispatches);
670 
671  s->initialized = 1;
672 
673 fail:
674  if (spv)
675  spv->uninit(&spv);
676 
677  return err;
678 }
679 
680 static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
681  FFVkBuffer *ws_vk, uint32_t ws_stride[4])
682 {
683  FFVulkanContext *vkctx = &s->vkctx;
684  FFVulkanFunctions *vk = &vkctx->vkfn;
685  VkBufferMemoryBarrier2 buf_bar[8];
686  int nb_buf_bar = 0;
687 
688  /* Denoise pass pipeline */
689  ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise);
690 
691  /* Push data */
692  ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT,
693  0, sizeof(DenoisePushData), &(DenoisePushData) {
694  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
695  });
696 
697  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
698  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
699  .srcStageMask = ws_vk->stage,
700  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
701  .srcAccessMask = ws_vk->access,
702  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
703  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
704  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
705  .buffer = ws_vk->buf,
706  .size = ws_vk->size,
707  .offset = 0,
708  };
709 
710  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
711  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
712  .pBufferMemoryBarriers = buf_bar,
713  .bufferMemoryBarrierCount = nb_buf_bar,
714  });
715  ws_vk->stage = buf_bar[0].dstStageMask;
716  ws_vk->access = buf_bar[0].dstAccessMask;
717 
718  /* End of denoise pass */
719  vk->CmdDispatch(exec->buf,
720  FFALIGN(vkctx->output_width, s->pl_denoise.wg_size[0])/s->pl_denoise.wg_size[0],
721  FFALIGN(vkctx->output_height, s->pl_denoise.wg_size[1])/s->pl_denoise.wg_size[1],
722  av_pix_fmt_count_planes(s->vkctx.output_format));
723 
724  return 0;
725 }
726 
727 static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
728 {
729  int err;
730  AVFrame *out = NULL;
731  AVFilterContext *ctx = link->dst;
732  NLMeansVulkanContext *s = ctx->priv;
733  AVFilterLink *outlink = ctx->outputs[0];
734  FFVulkanContext *vkctx = &s->vkctx;
735  FFVulkanFunctions *vk = &vkctx->vkfn;
736 
737  const AVPixFmtDescriptor *desc;
738  int plane_widths[4];
739  int plane_heights[4];
740 
741  int offsets_dispatched = 0;
742 
743  /* Integral */
744  AVBufferRef *integral_buf = NULL;
745  FFVkBuffer *integral_vk;
746  size_t int_stride;
747  size_t int_size;
748 
749  /* Weights/sums */
750  AVBufferRef *ws_buf = NULL;
751  FFVkBuffer *ws_vk;
752  VkDeviceAddress weights_addr[4];
753  VkDeviceAddress sums_addr[4];
754  uint32_t ws_stride[4];
755  size_t ws_size[4];
756  size_t ws_total_size = 0;
757 
758  FFVkExecContext *exec;
759  VkImageView in_views[AV_NUM_DATA_POINTERS];
760  VkImageView out_views[AV_NUM_DATA_POINTERS];
761  VkImageMemoryBarrier2 img_bar[8];
762  int nb_img_bar = 0;
763  VkBufferMemoryBarrier2 buf_bar[8];
764  int nb_buf_bar = 0;
765 
766  if (!s->initialized)
767  RET(init_filter(ctx));
768 
770  if (!desc)
771  return AVERROR(EINVAL);
772 
773  /* Integral image */
774  int_stride = s->pl_weights.wg_size[0]*s->pl_weights_rows*TYPE_SIZE;
775  int_size = s->pl_weights.wg_size[0]*s->pl_weights_rows*int_stride;
776 
777  /* Plane dimensions */
778  for (int i = 0; i < desc->nb_components; i++) {
779  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
780  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w);
781  plane_widths[i] = FFALIGN(plane_widths[i], s->pl_denoise.wg_size[0]);
782  plane_heights[i] = FFALIGN(plane_heights[i], s->pl_denoise.wg_size[1]);
783 
784  ws_stride[i] = plane_widths[i];
785  ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float);
786  ws_total_size += ws_size[i];
787  }
788 
789  /* Buffers */
790  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
791  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
792  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
793  NULL,
794  s->opts.t * int_size,
795  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
796  if (err < 0)
797  return err;
798  integral_vk = (FFVkBuffer *)integral_buf->data;
799 
800  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
801  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
802  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
803  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
804  NULL,
805  ws_total_size * 2,
806  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
807  if (err < 0)
808  return err;
809  ws_vk = (FFVkBuffer *)ws_buf->data;
810 
811  weights_addr[0] = ws_vk->address;
812  sums_addr[0] = ws_vk->address + ws_total_size;
813  for (int i = 1; i < desc->nb_components; i++) {
814  weights_addr[i] = weights_addr[i - 1] + ws_size[i - 1];
815  sums_addr[i] = sums_addr[i - 1] + ws_size[i - 1];
816  }
817 
818  /* Output frame */
819  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
820  if (!out) {
821  err = AVERROR(ENOMEM);
822  goto fail;
823  }
824 
825  /* Execution context */
826  exec = ff_vk_exec_get(&s->e);
827  ff_vk_exec_start(vkctx, exec);
828 
829  /* Dependencies */
830  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
831  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
832  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
833  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
834  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
835  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
836 
837  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
838  integral_buf = NULL;
839 
840  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
841  ws_buf = NULL;
842 
843  /* Input frame prep */
844  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
845  ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0,
846  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
847  s->sampler);
848  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
849  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
850  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
851  VK_ACCESS_SHADER_READ_BIT,
852  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
853  VK_QUEUE_FAMILY_IGNORED);
854 
855  /* Output frame prep */
856  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
857  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
858  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
859  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
860  VK_ACCESS_SHADER_WRITE_BIT,
861  VK_IMAGE_LAYOUT_GENERAL,
862  VK_QUEUE_FAMILY_IGNORED);
863 
864  nb_buf_bar = 0;
865  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
866  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
867  .srcStageMask = ws_vk->stage,
868  .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
869  .srcAccessMask = ws_vk->access,
870  .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
871  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
872  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
873  .buffer = ws_vk->buf,
874  .size = ws_vk->size,
875  .offset = 0,
876  };
877  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
878  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
879  .srcStageMask = integral_vk->stage,
880  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
881  .srcAccessMask = integral_vk->access,
882  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
883  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
884  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
885  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
886  .buffer = integral_vk->buf,
887  .size = integral_vk->size,
888  .offset = 0,
889  };
890 
891  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
892  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
893  .pImageMemoryBarriers = img_bar,
894  .imageMemoryBarrierCount = nb_img_bar,
895  .pBufferMemoryBarriers = buf_bar,
896  .bufferMemoryBarrierCount = nb_buf_bar,
897  });
898  ws_vk->stage = buf_bar[0].dstStageMask;
899  ws_vk->access = buf_bar[0].dstAccessMask;
900  integral_vk->stage = buf_bar[1].dstStageMask;
901  integral_vk->access = buf_bar[1].dstAccessMask;
902 
903  /* Buffer zeroing */
904  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
905 
906  nb_buf_bar = 0;
907  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
908  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
909  .srcStageMask = ws_vk->stage,
910  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
911  .srcAccessMask = ws_vk->access,
912  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
913  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
914  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
915  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
916  .buffer = ws_vk->buf,
917  .size = ws_vk->size,
918  .offset = 0,
919  };
920 
921  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
922  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
923  .pBufferMemoryBarriers = buf_bar,
924  .bufferMemoryBarrierCount = nb_buf_bar,
925  });
926  ws_vk->stage = buf_bar[0].dstStageMask;
927  ws_vk->access = buf_bar[0].dstAccessMask;
928 
929  /* Update weights descriptors */
930  ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0,
931  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
932  s->sampler);
933  for (int i = 0; i < desc->nb_components; i++) {
934  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 0, 0,
935  weights_addr[i], ws_size[i],
936  VK_FORMAT_UNDEFINED));
937  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 1, 0,
938  sums_addr[i], ws_size[i],
939  VK_FORMAT_UNDEFINED));
940  }
941 
942  /* Update denoise descriptors */
943  ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, in, in_views, 0, 0,
944  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
945  s->sampler);
946  ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, out, out_views, 0, 1,
947  VK_IMAGE_LAYOUT_GENERAL, s->sampler);
948  for (int i = 0; i < desc->nb_components; i++) {
949  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 0, 0,
950  weights_addr[i], ws_size[i],
951  VK_FORMAT_UNDEFINED));
952  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 1, 0,
953  sums_addr[i], ws_size[i],
954  VK_FORMAT_UNDEFINED));
955  }
956 
957  /* Weights pipeline */
958  ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_weights);
959 
960  do {
961  int wg_invoc;
962  HorizontalPushData pd = {
963  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
964  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
965  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
966  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
967  { s->strength[0], s->strength[1], s->strength[2], s->strength[2], },
968  integral_vk->address,
969  (uint64_t)int_size,
970  (uint64_t)int_stride,
971  offsets_dispatched,
972  };
973 
974  if (offsets_dispatched) {
975  nb_buf_bar = 0;
976  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
977  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
978  .srcStageMask = integral_vk->stage,
979  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
980  .srcAccessMask = integral_vk->access,
981  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
982  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
983  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
984  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
985  .buffer = integral_vk->buf,
986  .size = integral_vk->size,
987  .offset = 0,
988  };
989 
990  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
991  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
992  .pBufferMemoryBarriers = buf_bar,
993  .bufferMemoryBarrierCount = nb_buf_bar,
994  });
995  integral_vk->stage = buf_bar[1].dstStageMask;
996  integral_vk->access = buf_bar[1].dstAccessMask;
997  }
998 
999  /* Push data */
1000  ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
1001  0, sizeof(pd), &pd);
1002 
1003  wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
1004  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
1005 
1006  /* End of horizontal pass */
1007  vk->CmdDispatch(exec->buf, 1, 1, wg_invoc);
1008 
1009  offsets_dispatched += wg_invoc * TYPE_ELEMS;
1010  } while (offsets_dispatched < s->nb_offsets);
1011 
1012  RET(denoise_pass(s, exec, ws_vk, ws_stride));
1013 
1014  err = ff_vk_exec_submit(vkctx, exec);
1015  if (err < 0)
1016  return err;
1017 
1018  err = av_frame_copy_props(out, in);
1019  if (err < 0)
1020  goto fail;
1021 
1022  av_frame_free(&in);
1023 
1024  return ff_filter_frame(outlink, out);
1025 
1026 fail:
1027  av_buffer_unref(&integral_buf);
1028  av_buffer_unref(&ws_buf);
1029  av_frame_free(&in);
1030  av_frame_free(&out);
1031  return err;
1032 }
1033 
1034 static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
1035 {
1036  NLMeansVulkanContext *s = avctx->priv;
1037  FFVulkanContext *vkctx = &s->vkctx;
1038  FFVulkanFunctions *vk = &vkctx->vkfn;
1039 
1040  ff_vk_exec_pool_free(vkctx, &s->e);
1041  ff_vk_pipeline_free(vkctx, &s->pl_weights);
1042  ff_vk_shader_free(vkctx, &s->shd_weights);
1043  ff_vk_pipeline_free(vkctx, &s->pl_denoise);
1044  ff_vk_shader_free(vkctx, &s->shd_denoise);
1045 
1046  av_buffer_pool_uninit(&s->integral_buf_pool);
1047  av_buffer_pool_uninit(&s->ws_buf_pool);
1048 
1049  if (s->sampler)
1050  vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
1051  vkctx->hwctx->alloc);
1052 
1053  ff_vk_uninit(&s->vkctx);
1054 
1055  av_freep(&s->xoffsets);
1056  av_freep(&s->yoffsets);
1057 
1058  s->initialized = 0;
1059 }
1060 
1061 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1062 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1063 static const AVOption nlmeans_vulkan_options[] = {
1064  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1065  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
1066  { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
1067  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS },
1068 
1069  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1070  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1071  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1072  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1073 
1074  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1075  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1076  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1077  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1078 
1079  { NULL }
1080 };
1081 
1082 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
1083 
1084 static const AVFilterPad nlmeans_vulkan_inputs[] = {
1085  {
1086  .name = "default",
1087  .type = AVMEDIA_TYPE_VIDEO,
1088  .filter_frame = &nlmeans_vulkan_filter_frame,
1089  .config_props = &ff_vk_filter_config_input,
1090  },
1091 };
1092 
1093 static const AVFilterPad nlmeans_vulkan_outputs[] = {
1094  {
1095  .name = "default",
1096  .type = AVMEDIA_TYPE_VIDEO,
1097  .config_props = &ff_vk_filter_config_output,
1098  },
1099 };
1100 
1102  .name = "nlmeans_vulkan",
1103  .description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1104  .priv_size = sizeof(NLMeansVulkanContext),
1105  .init = &ff_vk_filter_init,
1106  .uninit = &nlmeans_vulkan_uninit,
1107  FILTER_INPUTS(nlmeans_vulkan_inputs),
1108  FILTER_OUTPUTS(nlmeans_vulkan_outputs),
1110  .priv_class = &nlmeans_vulkan_class,
1111  .flags = AVFILTER_FLAG_HWDEVICE,
1112  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
1113 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:112
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:62
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:847
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:186
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:496
ff_vk_update_descriptor_img_array
void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Definition: vulkan.c:1726
ff_vk_pipeline_free
void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
Definition: vulkan.c:1843
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:265
r
const char * r
Definition: vf_curves.c:126
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
NLMeansVulkanContext::sampler
VkSampler sampler
Definition: vf_nlmeans_vulkan.c:38
out
FILE * out
Definition: movenc.c:54
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:351
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:80
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1018
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2962
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
FFVkBuffer::access
VkAccessFlags2 access
Definition: vulkan.h:104
ff_vk_qf_init
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, VkQueueFlagBits dev_family)
Chooses a QF and loads it into a context.
Definition: vulkan.c:224
FFVkBuffer::stage
VkPipelineStageFlags2 stage
Definition: vulkan.h:103
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:130
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:40
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:344
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:221
w
uint8_t w
Definition: llviddspenc.c:38
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:52
ff_vk_shader_create
int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, uint8_t *spirv, size_t spirv_size, const char *entrypoint)
Definition: vulkan.c:1414
AVOption
AVOption.
Definition: opt.h:346
b
#define b
Definition: input.c:41
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:100
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:55
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:1872
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
FFVkSPIRVCompiler::uninit
void(* uninit)(struct FFVkSPIRVCompiler **ctx)
Definition: vulkan_spirv.h:33
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:35
ff_vk_pipeline_descriptor_set_add
int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, int nb, int read_only, int print_to_shader_only)
Add descriptor to a pipeline.
Definition: vulkan.c:1464
ff_vk_shader_set_compute_sizes
void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z)
Definition: vulkan.c:1372
video.h
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:598
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:96
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:53
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3002
AVVulkanDeviceContext::alloc
const VkAllocationCallbacks * alloc
Custom memory allocator, else NULL.
Definition: hwcontext_vulkan.h:48
ff_vk_add_push_constant
int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1142
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:422
fail
#define fail()
Definition: checkasm.h:179
insert_weights_pass
static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert, int t, int dst_comp, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:142
vulkan_filter.h
ff_source_prefix_sum_comp
const char * ff_source_prefix_sum_comp
FFVulkanContext::atomic_float_feats
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats
Definition: vulkan.h:248
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:33
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
float
float
Definition: af_crystalizer.c:121
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:264
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:41
width
#define width
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:237
s1
#define s1
Definition: regdef.h:38
FLAGS
#define FLAGS
ctx
AVFormatContext * ctx
Definition: movenc.c:48
ff_vf_nlmeans_vulkan
const AVFilter ff_vf_nlmeans_vulkan
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:574
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:417
ff_vk_exec_bind_pipeline
void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl)
Definition: vulkan.c:1821
FFVkSPIRVCompiler::compile_shader
int(* compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, struct FFVkSPIRVShader *shd, uint8_t **data, size_t *size, const char *entrypoint, void **opaque)
Definition: vulkan_spirv.h:29
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:255
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
opts
AVDictionary * opts
Definition: movenc.c:50
NLMeansVulkanContext::pl_denoise
FFVulkanPipeline pl_denoise
Definition: vf_nlmeans_vulkan.c:49
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:679
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:328
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:198
FFVkBuffer::size
size_t size
Definition: vulkan.h:99
ff_vk_init_compute_pipeline
int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd)
Definition: vulkan.c:1784
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:58
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:295
FFVulkanContext
Definition: vulkan.h:228
FFVulkanPipeline
Definition: vulkan.h:131
insert_vertical_pass
static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:113
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
NLMeansVulkanContext::qf
FFVkQueueFamilyCtx qf
Definition: vf_nlmeans_vulkan.c:37
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:56
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, VkShaderStageFlags stage, uint32_t required_subgroup_size)
Shader management.
Definition: vulkan.c:1346
main
int main(int argc, char **argv)
Definition: avio_http_serve_files.c:99
s2
#define s2
Definition: regdef.h:39
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:365
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:32
NLMeansVulkanContext::xyoffsets_buf
FFVkBuffer xyoffsets_buf
Definition: vf_nlmeans_vulkan.c:43
FFVulkanDescriptorSetBinding
Definition: vulkan.h:83
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:106
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:138
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:64
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:345
FFVkQueueFamilyCtx
Definition: vulkan.h:110
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:266
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AVERROR_EXTERNAL
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:59
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FFVkExecContext
Definition: vulkan.h:152
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:84
ff_vk_update_push_exec
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Definition: vulkan.c:1739
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:30
internal.h
NLMeansVulkanContext::pl_weights
FFVulkanPipeline pl_weights
Definition: vf_nlmeans_vulkan.c:46
NLMeansVulkanContext::shd_denoise
FFVkSPIRVShader shd_denoise
Definition: vf_nlmeans_vulkan.c:50
AVFILTER_DEFINE_CLASS
#define AVFILTER_DEFINE_CLASS(fname)
Definition: internal.h:323
FFVkSPIRVCompiler
Definition: vulkan_spirv.h:27
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: internal.h:172
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
NLMeansVulkanContext::pl_weights_rows
int pl_weights_rows
Definition: vf_nlmeans_vulkan.c:45
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:512
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
init_filter
static int init_filter(FilteringContext *fctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, const char *filter_spec)
Definition: transcode.c:244
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlagBits new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:1303
DUP_SAMPLER
#define DUP_SAMPLER(x)
Definition: vulkan.h:73
ff_vk_shader_rep_fmt
const char * ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
Returns the format to use for images in shaders.
Definition: vulkan.c:1206
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
vulkan_spirv.h
NLMeansVulkanContext::shd_weights
FFVkSPIRVShader shd_weights
Definition: vf_nlmeans_vulkan.c:47
FFVulkanContext::props
VkPhysicalDeviceProperties2 props
Definition: vulkan.h:233
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:39
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:54
FFVkSPIRVCompiler::free_shader
void(* free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque)
Definition: vulkan_spirv.h:32
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:54
AVFilter
Filter definition.
Definition: avfilter.h:166
insert_horizontal_pass
static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:87
TYPE_NAME
#define TYPE_NAME
Definition: vf_nlmeans_vulkan.c:28
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:410
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:231
FFVkExecPool
Definition: vulkan.h:210
pos
unsigned int pos
Definition: spdifenc.c:413
OFFSET
#define OFFSET(x)
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:164
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:60
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:63
random_seed.h
FFVkSPIRVShader
Definition: vulkan.h:75
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:235
insert_first
static void insert_first(FFVkSPIRVShader *shd, int r, const char *off, int horiz, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:70
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:61
desc
const char * desc
Definition: libsvtav1.c:75
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:44
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:166
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFVulkanContext::hwctx
AVVulkanDeviceContext * hwctx
Definition: vulkan.h:253
ff_vk_set_descriptor_buffer
int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, int set, int bind, int offs, VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt)
Definition: vulkan.c:1681
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:36
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
AVVulkanDeviceContext::act_dev
VkDevice act_dev
Active device.
Definition: hwcontext_vulkan.h:70
planes
static const struct @386 planes[]
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
ff_vk_init_sampler
int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, int unnorm_coords, VkFilter filt)
Create a sampler.
Definition: vulkan.c:1162
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:29
FFVkBuffer
Definition: vulkan.h:95
ff_vk_exec_pipeline_register
int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanPipeline *pl)
Register a pipeline with an exec pool.
Definition: vulkan.c:1578
d
d
Definition: ffmpeg_filter.c:409
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:723
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:1230
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
Definition: vulkan.c:1405
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:59
RET
#define RET(x)
Definition: vulkan.h:67
FFVulkanFunctions
Definition: vulkan_functions.h:226
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1090
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:33