FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/random_seed.h"
23 #include "libavutil/opt.h"
24 #include "vulkan_filter.h"
25 #include "vulkan_spirv.h"
26 
27 #include "filters.h"
28 #include "video.h"
29 
30 #define TYPE_NAME "vec4"
31 #define TYPE_ELEMS 4
32 #define TYPE_SIZE (TYPE_ELEMS*4)
33 
34 typedef struct NLMeansVulkanContext {
36 
40  VkSampler sampler;
41 
44 
46 
50 
53 
54  int *xoffsets;
55  int *yoffsets;
57  float strength[4];
58  int patch[4];
59 
60  struct nlmeans_opts {
61  int r;
62  double s;
63  double sc[4];
64  int p;
65  int pc[4];
66  int t;
67  } opts;
69 
70 extern const char *ff_source_prefix_sum_comp;
71 
72 static void insert_first(FFVkSPIRVShader *shd, int r, const char *off, int horiz, int plane, int comp)
73 {
74  GLSLF(4, s1 = texture(input_img[%i], pos + ivec2(%i + %s, %i + %s))[%i];
75  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
76 
77  GLSLF(4, s2[0] = texture(input_img[%i], pos + offs[0] + ivec2(%i + %s, %i + %s))[%i];
78  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
79  GLSLF(4, s2[1] = texture(input_img[%i], pos + offs[1] + ivec2(%i + %s, %i + %s))[%i];
80  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
81  GLSLF(4, s2[2] = texture(input_img[%i], pos + offs[2] + ivec2(%i + %s, %i + %s))[%i];
82  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
83  GLSLF(4, s2[3] = texture(input_img[%i], pos + offs[3] + ivec2(%i + %s, %i + %s))[%i];
84  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
85 
86  GLSLC(4, s2 = (s1 - s2) * (s1 - s2); );
87 }
88 
89 static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
90 {
91  GLSLF(1, pos.y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
92  if (!first)
93  GLSLC(1, barrier(); );
94  GLSLC(0, );
95  GLSLF(1, if (pos.y < height[%i]) { ,plane);
96  GLSLC(2, #pragma unroll(1) );
97  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
98  GLSLC(3, prefix_sum = DTYPE(0); );
99  GLSLC(3, offset = int_stride * uint64_t(pos.y + r); );
100  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
101  GLSLC(0, );
102  GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
103  if (first)
104  insert_first(shd, 0, "r", 0, plane, comp);
105  else
106  GLSLC(4, s2 = dst.v[pos.x]; );
107  GLSLC(4, dst.v[pos.x] = s2 + prefix_sum; );
108  GLSLC(4, prefix_sum += s2; );
109  GLSLC(3, } );
110  GLSLC(2, } );
111  GLSLC(1, } );
112  GLSLC(0, );
113 }
114 
115 static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
116 {
117  GLSLF(1, pos.x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
118  GLSLC(1, #pragma unroll(1) );
119  GLSLF(1, for (r = 0; r < %i; r++) ,nb_rows);
120  GLSLC(2, psum[r] = DTYPE(0); );
121  GLSLC(0, );
122  if (!first)
123  GLSLC(1, barrier(); );
124  GLSLC(0, );
125  GLSLF(1, if (pos.x < width[%i]) { ,plane);
126  GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
127  GLSLC(3, offset = int_stride * uint64_t(pos.y); );
128  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
129  GLSLC(0, );
130  GLSLC(3, #pragma unroll(1) );
131  GLSLF(3, for (r = 0; r < %i; r++) { ,nb_rows);
132  if (first)
133  insert_first(shd, 0, "r", 1, plane, comp);
134  else
135  GLSLC(4, s2 = dst.v[pos.x + r]; );
136  GLSLC(4, dst.v[pos.x + r] = s2 + psum[r]; );
137  GLSLC(4, psum[r] += s2; );
138  GLSLC(3, } );
139  GLSLC(2, } );
140  GLSLC(1, } );
141  GLSLC(0, );
142 }
143 
144 static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert,
145  int t, int dst_comp, int plane, int comp)
146 {
147  GLSLF(1, p = patch_size[%i]; ,dst_comp);
148  GLSLC(0, );
149  GLSLC(1, barrier(); );
150  GLSLC(0, );
151  if (!vert) {
152  GLSLF(1, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
153  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane);
154  GLSLC(3, break; );
155  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
156  GLSLF(3, pos.x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
157  } else {
158  GLSLF(1, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
159  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane);
160  GLSLC(3, break; );
161  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
162  GLSLF(3, pos.y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
163  }
164  GLSLC(0, );
165  GLSLC(3, a = DTYPE(0); );
166  GLSLC(3, b = DTYPE(0); );
167  GLSLC(3, c = DTYPE(0); );
168  GLSLC(3, d = DTYPE(0); );
169  GLSLC(0, );
170  GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); );
171  GLSLC(0, );
172  GLSLF(3, src[0] = texture(input_img[%i], pos + offs[0])[%i]; ,plane, comp);
173  GLSLF(3, src[1] = texture(input_img[%i], pos + offs[1])[%i]; ,plane, comp);
174  GLSLF(3, src[2] = texture(input_img[%i], pos + offs[2])[%i]; ,plane, comp);
175  GLSLF(3, src[3] = texture(input_img[%i], pos + offs[3])[%i]; ,plane, comp);
176  GLSLC(0, );
177  GLSLC(3, if (lt == false) { );
178  GLSLC(3, offset = int_stride * uint64_t(pos.y - p); );
179  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
180  GLSLC(4, a = dst.v[pos.x - p]; );
181  GLSLC(4, c = dst.v[pos.x + p]; );
182  GLSLC(3, offset = int_stride * uint64_t(pos.y + p); );
183  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
184  GLSLC(4, b = dst.v[pos.x - p]; );
185  GLSLC(4, d = dst.v[pos.x + p]; );
186  GLSLC(3, } );
187  GLSLC(0, );
188  GLSLC(3, patch_diff = d + a - b - c; );
189  GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp);
190  GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; );
191  GLSLC(3, sum = dot(w, src*255); );
192  GLSLC(0, );
193  if (t > 1) {
194  GLSLF(3, atomicAdd(weights_%i[pos.y*ws_stride[%i] + pos.x], w_sum); ,dst_comp, dst_comp);
195  GLSLF(3, atomicAdd(sums_%i[pos.y*ws_stride[%i] + pos.x], sum); ,dst_comp, dst_comp);
196  } else {
197  GLSLF(3, weights_%i[pos.y*ws_stride[%i] + pos.x] += w_sum; ,dst_comp, dst_comp);
198  GLSLF(3, sums_%i[pos.y*ws_stride[%i] + pos.x] += sum; ,dst_comp, dst_comp);
199  }
200  GLSLC(2, } );
201  GLSLC(1, } );
202 }
203 
204 typedef struct HorizontalPushData {
205  uint32_t width[4];
206  uint32_t height[4];
207  uint32_t ws_stride[4];
208  int32_t patch_size[4];
209  float strength[4];
210  VkDeviceAddress integral_base;
211  uint64_t integral_size;
212  uint64_t int_stride;
213  uint32_t xyoffs_start;
214 } HorizontalPushData;
215 
216 static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
218  VkSampler sampler, FFVkSPIRVCompiler *spv,
219  int width, int height, int t,
220  const AVPixFmtDescriptor *desc,
221  int planes, int *nb_rows)
222 {
223  int err;
224  uint8_t *spv_data;
225  size_t spv_len;
226  void *spv_opaque = NULL;
228  int max_dim = FFMAX(width, height);
229  uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0];
230  int wg_size, wg_rows;
231 
232  /* Round the max workgroup size to the previous power of two */
233  wg_size = max_wg;
234  wg_rows = 1;
235 
236  if (max_wg > max_dim) {
237  wg_size = max_dim;
238  } else if (max_wg < max_dim) {
239  /* Make it fit */
240  while (wg_size*wg_rows < max_dim)
241  wg_rows++;
242  }
243 
244  RET(ff_vk_shader_init(pl, shd, "nlmeans_weights", VK_SHADER_STAGE_COMPUTE_BIT, 0));
245  ff_vk_shader_set_compute_sizes(shd, wg_size, 1, 1);
246  *nb_rows = wg_rows;
247 
248  if (t > 1)
249  GLSLC(0, #extension GL_EXT_shader_atomic_float : require );
250  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
251  GLSLC(0, );
252  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
253  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
254  GLSLC(0, );
255  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
256  GLSLC(1, DTYPE v[]; );
257  GLSLC(0, }; );
258  GLSLC(0, );
259  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
260  GLSLC(1, uvec4 width; );
261  GLSLC(1, uvec4 height; );
262  GLSLC(1, uvec4 ws_stride; );
263  GLSLC(1, ivec4 patch_size; );
264  GLSLC(1, vec4 strength; );
265  GLSLC(1, DataBuffer integral_base; );
266  GLSLC(1, uint64_t integral_size; );
267  GLSLC(1, uint64_t int_stride; );
268  GLSLC(1, uint xyoffs_start; );
269  GLSLC(0, }; );
270  GLSLC(0, );
271 
272  ff_vk_add_push_constant(pl, 0, sizeof(HorizontalPushData), VK_SHADER_STAGE_COMPUTE_BIT);
273 
274  desc_set = (FFVulkanDescriptorSetBinding []) {
275  {
276  .name = "input_img",
277  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
278  .dimensions = 2,
279  .elems = planes,
280  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
281  .samplers = DUP_SAMPLER(sampler),
282  },
283  {
284  .name = "weights_buffer_0",
285  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
286  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
287  .buf_content = "float weights_0[];",
288  },
289  {
290  .name = "sums_buffer_0",
291  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
292  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
293  .buf_content = "float sums_0[];",
294  },
295  {
296  .name = "weights_buffer_1",
297  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
298  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
299  .buf_content = "float weights_1[];",
300  },
301  {
302  .name = "sums_buffer_1",
303  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
304  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
305  .buf_content = "float sums_1[];",
306  },
307  {
308  .name = "weights_buffer_2",
309  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
310  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
311  .buf_content = "float weights_2[];",
312  },
313  {
314  .name = "sums_buffer_2",
315  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
316  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
317  .buf_content = "float sums_2[];",
318  },
319  {
320  .name = "weights_buffer_3",
321  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
322  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
323  .buf_content = "float weights_3[];",
324  },
325  {
326  .name = "sums_buffer_3",
327  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
328  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
329  .buf_content = "float sums_3[];",
330  },
331  };
332  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 1 + 2*desc->nb_components, 0, 0));
333 
334  desc_set = (FFVulkanDescriptorSetBinding []) {
335  {
336  .name = "xyoffsets_buffer",
337  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
338  .mem_quali = "readonly",
339  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
340  .buf_content = "ivec2 xyoffsets[];",
341  },
342  };
343  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 1, 1, 0));
344 
345  GLSLC(0, );
346  GLSLC(0, void main() );
347  GLSLC(0, { );
348  GLSLC(1, uint64_t offset; );
349  GLSLC(1, DataBuffer dst; );
350  GLSLC(1, float s1; );
351  GLSLC(1, DTYPE s2; );
352  GLSLC(1, DTYPE prefix_sum; );
353  GLSLF(1, DTYPE psum[%i]; ,*nb_rows);
354  GLSLC(1, int r; );
355  GLSLC(1, ivec2 pos; );
356  GLSLC(1, int p; );
357  GLSLC(0, );
358  GLSLC(1, DataBuffer integral_data; );
359  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
360  GLSLC(0, );
361  GLSLC(1, int invoc_idx = int(gl_WorkGroupID.z); );
362  GLSLC(0, );
363  GLSLC(1, offset = integral_size * invoc_idx; );
364  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
365  for (int i = 0; i < TYPE_ELEMS; i++)
366  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
367  GLSLC(0, );
368  GLSLC(1, DTYPE a; );
369  GLSLC(1, DTYPE b; );
370  GLSLC(1, DTYPE c; );
371  GLSLC(1, DTYPE d; );
372  GLSLC(0, );
373  GLSLC(1, DTYPE patch_diff; );
374  if (TYPE_ELEMS == 4) {
375  GLSLC(1, vec4 src; );
376  GLSLC(1, vec4 w; );
377  } else {
378  GLSLC(1, vec4 src[4]; );
379  GLSLC(1, vec4 w[4]; );
380  }
381  GLSLC(1, float w_sum; );
382  GLSLC(1, float sum; );
383  GLSLC(0, );
384  GLSLC(1, bool lt; );
385  GLSLC(1, bool gt; );
386  GLSLC(0, );
387 
388  for (int i = 0; i < desc->nb_components; i++) {
389  int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
390  if (width >= height) {
391  insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
392  insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
393  insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off);
394  } else {
395  insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
396  insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
397  insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off);
398  }
399  }
400 
401  GLSLC(0, } );
402 
403  RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque));
404  RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
405 
406  RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
407  RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
408 
409 fail:
410  if (spv_opaque)
411  spv->free_shader(spv, &spv_opaque);
412 
413  return err;
414 }
415 
416 typedef struct DenoisePushData {
417  uint32_t ws_stride[4];
418 } DenoisePushData;
419 
420 static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
422  VkSampler sampler, FFVkSPIRVCompiler *spv,
423  const AVPixFmtDescriptor *desc, int planes)
424 {
425  int err;
426  uint8_t *spv_data;
427  size_t spv_len;
428  void *spv_opaque = NULL;
430 
431  RET(ff_vk_shader_init(pl, shd, "nlmeans_denoise",
432  VK_SHADER_STAGE_COMPUTE_BIT, 0));
433 
434  ff_vk_shader_set_compute_sizes(shd, 32, 32, 1);
435 
436  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
437  GLSLC(1, uvec4 ws_stride; );
438  GLSLC(0, }; );
439 
440  ff_vk_add_push_constant(pl, 0, sizeof(DenoisePushData), VK_SHADER_STAGE_COMPUTE_BIT);
441 
442  desc_set = (FFVulkanDescriptorSetBinding []) {
443  {
444  .name = "input_img",
445  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
446  .dimensions = 2,
447  .elems = planes,
448  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
449  .samplers = DUP_SAMPLER(sampler),
450  },
451  {
452  .name = "output_img",
453  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
454  .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format),
455  .mem_quali = "writeonly",
456  .dimensions = 2,
457  .elems = planes,
458  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
459  },
460  };
461  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 2, 0, 0));
462 
463  desc_set = (FFVulkanDescriptorSetBinding []) {
464  {
465  .name = "weights_buffer_0",
466  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
467  .mem_quali = "readonly",
468  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
469  .buf_content = "float weights_0[];",
470  },
471  {
472  .name = "sums_buffer_0",
473  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
474  .mem_quali = "readonly",
475  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
476  .buf_content = "float sums_0[];",
477  },
478  {
479  .name = "weights_buffer_1",
480  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
481  .mem_quali = "readonly",
482  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
483  .buf_content = "float weights_1[];",
484  },
485  {
486  .name = "sums_buffer_1",
487  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
488  .mem_quali = "readonly",
489  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
490  .buf_content = "float sums_1[];",
491  },
492  {
493  .name = "weights_buffer_2",
494  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
495  .mem_quali = "readonly",
496  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
497  .buf_content = "float weights_2[];",
498  },
499  {
500  .name = "sums_buffer_2",
501  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
502  .mem_quali = "readonly",
503  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
504  .buf_content = "float sums_2[];",
505  },
506  {
507  .name = "weights_buffer_3",
508  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
509  .mem_quali = "readonly",
510  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
511  .buf_content = "float weights_3[];",
512  },
513  {
514  .name = "sums_buffer_3",
515  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
516  .mem_quali = "readonly",
517  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
518  .buf_content = "float sums_3[];",
519  },
520  };
521 
522  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 2*desc->nb_components, 0, 0));
523 
524  GLSLC(0, void main() );
525  GLSLC(0, { );
526  GLSLC(1, ivec2 size; );
527  GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
528  GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); );
529  GLSLC(0, );
530  GLSLC(1, float w_sum; );
531  GLSLC(1, float sum; );
532  GLSLC(1, vec4 src; );
533  GLSLC(1, vec4 r; );
534  GLSLC(0, );
535  GLSLC(1, size = imageSize(output_img[plane]); );
536  GLSLC(1, if (!IS_WITHIN(pos, size)) );
537  GLSLC(2, return; );
538  GLSLC(0, );
539  GLSLC(1, src = texture(input_img[plane], pos); );
540  GLSLC(0, );
541  for (int c = 0; c < desc->nb_components; c++) {
542  int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8);
543  GLSLF(1, if (plane == %i) { ,desc->comp[c].plane);
544  GLSLF(2, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
545  GLSLF(2, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
546  GLSLF(2, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off);
547  GLSLC(1, } );
548  GLSLC(0, );
549  }
550  GLSLC(1, imageStore(output_img[plane], pos, r); );
551  GLSLC(0, } );
552 
553  RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque));
554  RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
555 
556  RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
557  RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
558 
559 fail:
560  if (spv_opaque)
561  spv->free_shader(spv, &spv_opaque);
562 
563  return err;
564 }
565 
567 {
568  int rad, err;
569  int xcnt = 0, ycnt = 0;
570  NLMeansVulkanContext *s = ctx->priv;
571  FFVulkanContext *vkctx = &s->vkctx;
572  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
573  FFVkSPIRVCompiler *spv = NULL;
574  int *offsets_buf;
575  int offsets_dispatched = 0, nb_dispatches = 0;
576 
577  const AVPixFmtDescriptor *desc;
579  if (!desc)
580  return AVERROR(EINVAL);
581 
582  if (!(s->opts.r & 1)) {
583  s->opts.r |= 1;
584  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
585  s->opts.r);
586  }
587 
588  if (!(s->opts.p & 1)) {
589  s->opts.p |= 1;
590  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
591  s->opts.p);
592  }
593 
594  for (int i = 0; i < 4; i++) {
595  double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s;
596  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
597  str = 10.0f*str;
598  str *= -str;
599  str = 255.0*255.0 / str;
600  s->strength[i] = str;
601  if (!(ps & 1)) {
602  ps |= 1;
603  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
604  ps);
605  }
606  s->patch[i] = ps / 2;
607  }
608 
609  rad = s->opts.r/2;
610  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
611  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
612  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
613  s->nb_offsets = 0;
614 
615  for (int x = -rad; x <= rad; x++) {
616  for (int y = -rad; y <= rad; y++) {
617  if (!x && !y)
618  continue;
619 
620  s->xoffsets[xcnt++] = x;
621  s->yoffsets[ycnt++] = y;
622  s->nb_offsets++;
623  }
624  }
625 
626  RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL,
627  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
628  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
629  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
630  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
631  RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0));
632 
633  for (int i = 0; i < 2*s->nb_offsets; i += 2) {
634  offsets_buf[i + 0] = s->xoffsets[i >> 1];
635  offsets_buf[i + 1] = s->yoffsets[i >> 1];
636  }
637 
638  RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1));
639 
640  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
641  if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) {
642  av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, "
643  "disabling dispatch parallelism\n");
644  s->opts.t = 1;
645  }
646 
647  spv = ff_vk_spirv_init();
648  if (!spv) {
649  av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
650  return AVERROR_EXTERNAL;
651  }
652 
653  ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
654  RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, 1, 0, 0, 0, NULL));
655  RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
656 
657  RET(init_weights_pipeline(vkctx, &s->e, &s->pl_weights, &s->shd_weights, s->sampler,
658  spv, s->vkctx.output_width, s->vkctx.output_height,
659  s->opts.t, desc, planes, &s->pl_weights_rows));
660 
661  RET(init_denoise_pipeline(vkctx, &s->e, &s->pl_denoise, &s->shd_denoise, s->sampler,
662  spv, desc, planes));
663 
664  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, NULL, 1, 0, 0,
665  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
666  VK_FORMAT_UNDEFINED));
667 
668  do {
669  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
670  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
671  offsets_dispatched += wg_invoc * TYPE_ELEMS;
672  nb_dispatches++;
673  } while (offsets_dispatched < s->nb_offsets);
674 
675  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n",
676  s->nb_offsets, nb_dispatches);
677 
678  s->initialized = 1;
679 
680 fail:
681  if (spv)
682  spv->uninit(&spv);
683 
684  return err;
685 }
686 
687 static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
688  FFVkBuffer *ws_vk, uint32_t ws_stride[4])
689 {
690  FFVulkanContext *vkctx = &s->vkctx;
691  FFVulkanFunctions *vk = &vkctx->vkfn;
692  VkBufferMemoryBarrier2 buf_bar[8];
693  int nb_buf_bar = 0;
694 
695  DenoisePushData pd = {
696  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
697  };
698 
699  /* Denoise pass pipeline */
700  ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise);
701 
702  /* Push data */
703  ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT,
704  0, sizeof(pd), &pd);
705 
706  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
707  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
708  .srcStageMask = ws_vk->stage,
709  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
710  .srcAccessMask = ws_vk->access,
711  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
712  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
713  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
714  .buffer = ws_vk->buf,
715  .size = ws_vk->size,
716  .offset = 0,
717  };
718 
719  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
720  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
721  .pBufferMemoryBarriers = buf_bar,
722  .bufferMemoryBarrierCount = nb_buf_bar,
723  });
724  ws_vk->stage = buf_bar[0].dstStageMask;
725  ws_vk->access = buf_bar[0].dstAccessMask;
726 
727  /* End of denoise pass */
728  vk->CmdDispatch(exec->buf,
729  FFALIGN(vkctx->output_width, s->pl_denoise.wg_size[0])/s->pl_denoise.wg_size[0],
730  FFALIGN(vkctx->output_height, s->pl_denoise.wg_size[1])/s->pl_denoise.wg_size[1],
731  av_pix_fmt_count_planes(s->vkctx.output_format));
732 
733  return 0;
734 }
735 
736 static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
737 {
738  int err;
739  AVFrame *out = NULL;
740  AVFilterContext *ctx = link->dst;
741  NLMeansVulkanContext *s = ctx->priv;
742  AVFilterLink *outlink = ctx->outputs[0];
743  FFVulkanContext *vkctx = &s->vkctx;
744  FFVulkanFunctions *vk = &vkctx->vkfn;
745 
746  const AVPixFmtDescriptor *desc;
747  int plane_widths[4];
748  int plane_heights[4];
749 
750  int offsets_dispatched = 0;
751 
752  /* Integral */
753  AVBufferRef *integral_buf = NULL;
754  FFVkBuffer *integral_vk;
755  size_t int_stride;
756  size_t int_size;
757 
758  /* Weights/sums */
759  AVBufferRef *ws_buf = NULL;
760  FFVkBuffer *ws_vk;
761  VkDeviceSize weights_offs[4];
762  VkDeviceSize sums_offs[4];
763  uint32_t ws_stride[4];
764  size_t ws_size[4];
765  size_t ws_total_size = 0;
766 
767  FFVkExecContext *exec;
768  VkImageView in_views[AV_NUM_DATA_POINTERS];
769  VkImageView out_views[AV_NUM_DATA_POINTERS];
770  VkImageMemoryBarrier2 img_bar[8];
771  int nb_img_bar = 0;
772  VkBufferMemoryBarrier2 buf_bar[8];
773  int nb_buf_bar = 0;
774 
775  if (!s->initialized)
776  RET(init_filter(ctx));
777 
779  if (!desc)
780  return AVERROR(EINVAL);
781 
782  /* Integral image */
783  int_stride = s->pl_weights.wg_size[0]*s->pl_weights_rows*TYPE_SIZE;
784  int_size = s->pl_weights.wg_size[0]*s->pl_weights_rows*int_stride;
785 
786  /* Plane dimensions */
787  for (int i = 0; i < desc->nb_components; i++) {
788  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
789  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w);
790  plane_widths[i] = FFALIGN(plane_widths[i], s->pl_denoise.wg_size[0]);
791  plane_heights[i] = FFALIGN(plane_heights[i], s->pl_denoise.wg_size[1]);
792 
793  ws_stride[i] = plane_widths[i];
794  ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float);
795  ws_total_size += ws_size[i];
796  }
797 
798  /* Buffers */
799  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
800  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
801  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
802  NULL,
803  s->opts.t * int_size,
804  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
805  if (err < 0)
806  return err;
807  integral_vk = (FFVkBuffer *)integral_buf->data;
808 
809  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
810  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
811  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
812  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
813  NULL,
814  ws_total_size * 2,
815  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
816  if (err < 0)
817  return err;
818  ws_vk = (FFVkBuffer *)ws_buf->data;
819 
820  weights_offs[0] = 0;
821  sums_offs[0] = ws_total_size;
822  for (int i = 1; i < desc->nb_components; i++) {
823  weights_offs[i] = weights_offs[i - 1] + ws_size[i - 1];
824  sums_offs[i] = sums_offs[i - 1] + ws_size[i - 1];
825  }
826 
827  /* Output frame */
828  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
829  if (!out) {
830  err = AVERROR(ENOMEM);
831  goto fail;
832  }
833 
834  /* Execution context */
835  exec = ff_vk_exec_get(&s->e);
836  ff_vk_exec_start(vkctx, exec);
837 
838  /* Dependencies */
839  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
840  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
841  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
842  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
843  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
844  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
845 
846  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
847  integral_buf = NULL;
848 
849  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
850  ws_buf = NULL;
851 
852  /* Input frame prep */
853  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
854  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
855  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
856  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
857  VK_ACCESS_SHADER_READ_BIT,
858  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
859  VK_QUEUE_FAMILY_IGNORED);
860 
861  /* Output frame prep */
862  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
863  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
864  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
865  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
866  VK_ACCESS_SHADER_WRITE_BIT,
867  VK_IMAGE_LAYOUT_GENERAL,
868  VK_QUEUE_FAMILY_IGNORED);
869 
870  nb_buf_bar = 0;
871  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
872  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
873  .srcStageMask = ws_vk->stage,
874  .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
875  .srcAccessMask = ws_vk->access,
876  .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
877  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
878  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
879  .buffer = ws_vk->buf,
880  .size = ws_vk->size,
881  .offset = 0,
882  };
883  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
884  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
885  .srcStageMask = integral_vk->stage,
886  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
887  .srcAccessMask = integral_vk->access,
888  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
889  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
890  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
891  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
892  .buffer = integral_vk->buf,
893  .size = integral_vk->size,
894  .offset = 0,
895  };
896 
897  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
898  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
899  .pImageMemoryBarriers = img_bar,
900  .imageMemoryBarrierCount = nb_img_bar,
901  .pBufferMemoryBarriers = buf_bar,
902  .bufferMemoryBarrierCount = nb_buf_bar,
903  });
904  ws_vk->stage = buf_bar[0].dstStageMask;
905  ws_vk->access = buf_bar[0].dstAccessMask;
906  integral_vk->stage = buf_bar[1].dstStageMask;
907  integral_vk->access = buf_bar[1].dstAccessMask;
908 
909  /* Buffer zeroing */
910  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
911 
912  nb_buf_bar = 0;
913  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
914  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
915  .srcStageMask = ws_vk->stage,
916  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
917  .srcAccessMask = ws_vk->access,
918  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
919  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
920  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
921  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
922  .buffer = ws_vk->buf,
923  .size = ws_vk->size,
924  .offset = 0,
925  };
926 
927  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
928  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
929  .pBufferMemoryBarriers = buf_bar,
930  .bufferMemoryBarrierCount = nb_buf_bar,
931  });
932  ws_vk->stage = buf_bar[0].dstStageMask;
933  ws_vk->access = buf_bar[0].dstAccessMask;
934 
935  /* Update weights descriptors */
936  ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0,
937  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
938  s->sampler);
939  for (int i = 0; i < desc->nb_components; i++) {
940  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 0, 0,
941  ws_vk, weights_offs[i], ws_size[i],
942  VK_FORMAT_UNDEFINED));
943  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 1, 0,
944  ws_vk, sums_offs[i], ws_size[i],
945  VK_FORMAT_UNDEFINED));
946  }
947 
948  /* Update denoise descriptors */
949  ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, in, in_views, 0, 0,
950  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
951  s->sampler);
952  ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, out, out_views, 0, 1,
953  VK_IMAGE_LAYOUT_GENERAL, s->sampler);
954  for (int i = 0; i < desc->nb_components; i++) {
955  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 1, i*2 + 0, 0,
956  ws_vk, weights_offs[i], ws_size[i],
957  VK_FORMAT_UNDEFINED));
958  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 1, i*2 + 1, 0,
959  ws_vk, sums_offs[i], ws_size[i],
960  VK_FORMAT_UNDEFINED));
961  }
962 
963  /* Weights pipeline */
964  ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_weights);
965 
966  do {
967  int wg_invoc;
968  HorizontalPushData pd = {
969  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
970  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
971  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
972  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
973  { s->strength[0], s->strength[1], s->strength[2], s->strength[2], },
974  integral_vk->address,
975  (uint64_t)int_size,
976  (uint64_t)int_stride,
977  offsets_dispatched,
978  };
979 
980  /* Push data */
981  ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
982  0, sizeof(pd), &pd);
983 
984  if (offsets_dispatched) {
985  nb_buf_bar = 0;
986  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
987  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
988  .srcStageMask = integral_vk->stage,
989  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
990  .srcAccessMask = integral_vk->access,
991  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
992  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
993  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
994  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
995  .buffer = integral_vk->buf,
996  .size = integral_vk->size,
997  .offset = 0,
998  };
999 
1000  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1001  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1002  .pBufferMemoryBarriers = buf_bar,
1003  .bufferMemoryBarrierCount = nb_buf_bar,
1004  });
1005  integral_vk->stage = buf_bar[1].dstStageMask;
1006  integral_vk->access = buf_bar[1].dstAccessMask;
1007  }
1008 
1009  wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
1010  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
1011 
1012  /* End of horizontal pass */
1013  vk->CmdDispatch(exec->buf, 1, 1, wg_invoc);
1014 
1015  offsets_dispatched += wg_invoc * TYPE_ELEMS;
1016  } while (offsets_dispatched < s->nb_offsets);
1017 
1018  RET(denoise_pass(s, exec, ws_vk, ws_stride));
1019 
1020  err = ff_vk_exec_submit(vkctx, exec);
1021  if (err < 0)
1022  return err;
1023 
1024  err = av_frame_copy_props(out, in);
1025  if (err < 0)
1026  goto fail;
1027 
1028  av_frame_free(&in);
1029 
1030  return ff_filter_frame(outlink, out);
1031 
1032 fail:
1033  av_buffer_unref(&integral_buf);
1034  av_buffer_unref(&ws_buf);
1035  av_frame_free(&in);
1036  av_frame_free(&out);
1037  return err;
1038 }
1039 
1040 static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
1041 {
1042  NLMeansVulkanContext *s = avctx->priv;
1043  FFVulkanContext *vkctx = &s->vkctx;
1044  FFVulkanFunctions *vk = &vkctx->vkfn;
1045 
1046  ff_vk_exec_pool_free(vkctx, &s->e);
1047  ff_vk_pipeline_free(vkctx, &s->pl_weights);
1048  ff_vk_shader_free(vkctx, &s->shd_weights);
1049  ff_vk_pipeline_free(vkctx, &s->pl_denoise);
1050  ff_vk_shader_free(vkctx, &s->shd_denoise);
1051 
1052  av_buffer_pool_uninit(&s->integral_buf_pool);
1053  av_buffer_pool_uninit(&s->ws_buf_pool);
1054 
1055  if (s->sampler)
1056  vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
1057  vkctx->hwctx->alloc);
1058 
1059  ff_vk_uninit(&s->vkctx);
1060 
1061  av_freep(&s->xoffsets);
1062  av_freep(&s->yoffsets);
1063 
1064  s->initialized = 0;
1065 }
1066 
1067 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1068 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1069 static const AVOption nlmeans_vulkan_options[] = {
1070  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1071  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
1072  { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
1073  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS },
1074 
1075  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1076  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1077  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1078  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1079 
1080  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1081  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1082  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1083  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1084 
1085  { NULL }
1086 };
1087 
1088 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
1089 
1090 static const AVFilterPad nlmeans_vulkan_inputs[] = {
1091  {
1092  .name = "default",
1093  .type = AVMEDIA_TYPE_VIDEO,
1094  .filter_frame = &nlmeans_vulkan_filter_frame,
1095  .config_props = &ff_vk_filter_config_input,
1096  },
1097 };
1098 
1099 static const AVFilterPad nlmeans_vulkan_outputs[] = {
1100  {
1101  .name = "default",
1102  .type = AVMEDIA_TYPE_VIDEO,
1103  .config_props = &ff_vk_filter_config_output,
1104  },
1105 };
1106 
1108  .name = "nlmeans_vulkan",
1109  .description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1110  .priv_size = sizeof(NLMeansVulkanContext),
1111  .init = &ff_vk_filter_init,
1112  .uninit = &nlmeans_vulkan_uninit,
1113  FILTER_INPUTS(nlmeans_vulkan_inputs),
1114  FILTER_OUTPUTS(nlmeans_vulkan_outputs),
1116  .priv_class = &nlmeans_vulkan_class,
1117  .flags = AVFILTER_FLAG_HWDEVICE,
1118  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
1119 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:116
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:64
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:899
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:186
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:463
ff_vk_update_descriptor_img_array
void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Definition: vulkan.c:1908
ff_vk_pipeline_free
void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
Definition: vulkan.c:2092
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:278
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
NLMeansVulkanContext::sampler
VkSampler sampler
Definition: vf_nlmeans_vulkan.c:40
out
FILE * out
Definition: movenc.c:55
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:81
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1023
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2965
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
FFVkBuffer::access
VkAccessFlags2 access
Definition: vulkan.h:104
ff_vk_qf_init
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, VkQueueFlagBits dev_family)
Chooses a QF and loads it into a context.
Definition: vulkan.c:228
FFVkBuffer::stage
VkPipelineStageFlags2 stage
Definition: vulkan.h:103
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:162
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:42
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:262
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:389
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:231
w
uint8_t w
Definition: llviddspenc.c:38
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:54
ff_vk_shader_create
int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, uint8_t *spirv, size_t spirv_size, const char *entrypoint)
Definition: vulkan.c:1468
AVOption
AVOption.
Definition: opt.h:429
b
#define b
Definition: input.c:41
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:100
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:57
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:2130
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
FFVkSPIRVCompiler::uninit
void(* uninit)(struct FFVkSPIRVCompiler **ctx)
Definition: vulkan_spirv.h:33
ff_vk_set_descriptor_buffer
int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt)
Definition: vulkan.c:1843
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:205
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:37
ff_vk_shader_set_compute_sizes
void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z)
Definition: vulkan.c:1426
video.h
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:663
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:96
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:55
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3005
AVVulkanDeviceContext::alloc
const VkAllocationCallbacks * alloc
Custom memory allocator, else NULL.
Definition: hwcontext_vulkan.h:63
ff_vk_add_push_constant
int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1194
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:472
fail
#define fail()
Definition: checkasm.h:188
insert_weights_pass
static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert, int t, int dst_comp, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:144
vulkan_filter.h
ff_source_prefix_sum_comp
const char * ff_source_prefix_sum_comp
FFVulkanContext::atomic_float_feats
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats
Definition: vulkan.h:260
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:38
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
main
int main
Definition: dovi_rpuenc.c:37
float
float
Definition: af_crystalizer.c:122
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:277
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:43
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Underlying C type is double.
Definition: opt.h:267
FLAGS
#define FLAGS
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:49
ff_vf_nlmeans_vulkan
const AVFilter ff_vf_nlmeans_vulkan
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:541
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:452
ff_vk_exec_bind_pipeline
void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl)
Definition: vulkan.c:2063
FFVkSPIRVCompiler::compile_shader
int(* compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, struct FFVkSPIRVShader *shd, uint8_t **data, size_t *size, const char *entrypoint, void **opaque)
Definition: vulkan_spirv.h:29
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:238
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:263
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
opts
AVDictionary * opts
Definition: movenc.c:51
NLMeansVulkanContext::pl_denoise
FFVulkanPipeline pl_denoise
Definition: vf_nlmeans_vulkan.c:51
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:713
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:328
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:207
FFVkBuffer::size
size_t size
Definition: vulkan.h:99
ff_vk_init_compute_pipeline
int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd)
Definition: vulkan.c:2021
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:60
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:278
FFVulkanContext
Definition: vulkan.h:238
AVFILTER_DEFINE_CLASS
#define AVFILTER_DEFINE_CLASS(fname)
Definition: filters.h:273
FFVulkanPipeline
Definition: vulkan.h:207
insert_vertical_pass
static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:115
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:206
NLMeansVulkanContext::qf
FFVkQueueFamilyCtx qf
Definition: vf_nlmeans_vulkan.c:39
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:58
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, VkShaderStageFlags stage, uint32_t required_subgroup_size)
Shader management.
Definition: vulkan.c:1400
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:34
NLMeansVulkanContext::xyoffsets_buf
FFVkBuffer xyoffsets_buf
Definition: vf_nlmeans_vulkan.c:45
FFVulkanDescriptorSetBinding
Definition: vulkan.h:83
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:173
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:66
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:390
FFVkQueueFamilyCtx
Definition: vulkan.h:110
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:279
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AVERROR_EXTERNAL
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:59
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FFVkExecContext
Definition: vulkan.h:115
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:84
ff_vk_update_push_exec
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Definition: vulkan.c:1921
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:32
NLMeansVulkanContext::pl_weights
FFVulkanPipeline pl_weights
Definition: vf_nlmeans_vulkan.c:48
NLMeansVulkanContext::shd_denoise
FFVkSPIRVShader shd_denoise
Definition: vf_nlmeans_vulkan.c:52
FFVkSPIRVCompiler
Definition: vulkan_spirv.h:27
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
NLMeansVulkanContext::pl_weights_rows
int pl_weights_rows
Definition: vf_nlmeans_vulkan.c:47
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:479
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
init_filter
static int init_filter(FilteringContext *fctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, const char *filter_spec)
Definition: transcode.c:245
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlagBits new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:1357
DUP_SAMPLER
#define DUP_SAMPLER(x)
Definition: vulkan.h:73
ff_vk_shader_rep_fmt
const char * ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
Returns the format to use for images in shaders.
Definition: vulkan.c:1259
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
vulkan_spirv.h
NLMeansVulkanContext::shd_weights
FFVkSPIRVShader shd_weights
Definition: vf_nlmeans_vulkan.c:49
FFVulkanContext::props
VkPhysicalDeviceProperties2 props
Definition: vulkan.h:244
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:44
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:54
FFVkSPIRVCompiler::free_shader
void(* free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque)
Definition: vulkan_spirv.h:32
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:56
AVFilter
Filter definition.
Definition: avfilter.h:201
insert_horizontal_pass
static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:89
TYPE_NAME
#define TYPE_NAME
Definition: vf_nlmeans_vulkan.c:30
ff_vk_pipeline_descriptor_set_add
int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only)
Add descriptor to a pipeline.
Definition: vulkan.c:1518
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:445
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:242
FFVkExecPool
Definition: vulkan.h:173
pos
unsigned int pos
Definition: spdifenc.c:414
OFFSET
#define OFFSET(x)
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:127
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:62
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:65
random_seed.h
planes
static const struct @455 planes[]
FFVkSPIRVShader
Definition: vulkan.h:75
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
insert_first
static void insert_first(FFVkSPIRVShader *shd, int r, const char *off, int horiz, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:72
AVFilterContext
An instance of a filter.
Definition: avfilter.h:457
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:63
desc
const char * desc
Definition: libsvtav1.c:79
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:44
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:174
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFVulkanContext::hwctx
AVVulkanDeviceContext * hwctx
Definition: vulkan.h:266
mem.h
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:38
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
AVVulkanDeviceContext::act_dev
VkDevice act_dev
Active device.
Definition: hwcontext_vulkan.h:84
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
ff_vk_init_sampler
int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, int unnorm_coords, VkFilter filt)
Create a sampler.
Definition: vulkan.c:1214
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:31
FFVkBuffer
Definition: vulkan.h:95
ff_vk_exec_pipeline_register
int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanPipeline *pl)
Register a pipeline with an exec pool.
Definition: vulkan.c:1630
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:778
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:1283
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
Definition: vulkan.c:1459
width
#define width
Definition: dsp.h:85
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:61
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:252
RET
#define RET(x)
Definition: vulkan.h:67
FFVulkanFunctions
Definition: vulkan_functions.h:255
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1142
src
#define src
Definition: vp8dsp.c:248
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:35