FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/random_seed.h"
23 #include "libavutil/vulkan_spirv.h"
24 #include "libavutil/opt.h"
25 #include "vulkan_filter.h"
26 
27 #include "filters.h"
28 #include "video.h"
29 
30 #define TYPE_NAME "vec4"
31 #define TYPE_ELEMS 4
32 #define TYPE_SIZE (TYPE_ELEMS*4)
33 #define TYPE_BLOCK_ELEMS 16
34 #define TYPE_BLOCK_SIZE (TYPE_SIZE * TYPE_BLOCK_ELEMS)
35 #define WG_SIZE 32
36 
37 typedef struct NLMeansVulkanContext {
39 
43 
46 
48 
53 
54  int *xoffsets;
55  int *yoffsets;
57  float strength[4];
58  int patch[4];
59 
60  struct nlmeans_opts {
61  int r;
62  double s;
63  double sc[4];
64  int p;
65  int pc[4];
66  int t;
67  } opts;
69 
70 typedef struct IntegralPushData {
71  uint32_t width[4];
72  uint32_t height[4];
73  float strength[4];
74  uint32_t comp_off[4];
75  uint32_t comp_plane[4];
76  VkDeviceAddress integral_base;
77  uint64_t integral_size;
78  uint64_t int_stride;
79  uint32_t xyoffs_start;
80  uint32_t nb_components;
82 
83 static void shared_shd_def(FFVulkanShader *shd) {
84  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
85  GLSLC(0, );
86  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
87  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
88  GLSLF(0, #define T_BLOCK_ELEMS %i ,TYPE_BLOCK_ELEMS);
89  GLSLF(0, #define T_BLOCK_ALIGN %i ,TYPE_BLOCK_SIZE);
90  GLSLC(0, );
91  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
92  GLSLC(1, DTYPE v[]; );
93  GLSLC(0, }; );
94  GLSLC(0, struct Block { );
95  GLSLC(1, DTYPE data[T_BLOCK_ELEMS]; );
96  GLSLC(0, }; );
97  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_BLOCK_ALIGN) buffer BlockBuffer { );
98  GLSLC(1, Block v[]; );
99  GLSLC(0, }; );
100  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
101  GLSLC(1, uvec4 width; );
102  GLSLC(1, uvec4 height; );
103  GLSLC(1, vec4 strength; );
104  GLSLC(1, uvec4 comp_off; );
105  GLSLC(1, uvec4 comp_plane; );
106  GLSLC(1, DataBuffer integral_base; );
107  GLSLC(1, uint64_t integral_size; );
108  GLSLC(1, uint64_t int_stride; );
109  GLSLC(1, uint xyoffs_start; );
110  GLSLC(1, uint nb_components; );
111  GLSLC(0, }; );
112  GLSLC(0, );
113 
115  VK_SHADER_STAGE_COMPUTE_BIT);
116 }
117 
119  FFVulkanShader *shd_horizontal,
120  FFVulkanShader *shd_vertical,
121  FFVkSPIRVCompiler *spv,
122  const AVPixFmtDescriptor *desc, int planes)
123 {
124  int err;
125  uint8_t *spv_data;
126  size_t spv_len;
127  void *spv_opaque = NULL;
128  FFVulkanShader *shd;
130 
131  shd = shd_horizontal;
132  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_horizontal",
133  VK_SHADER_STAGE_COMPUTE_BIT,
134  (const char *[]) { "GL_EXT_buffer_reference",
135  "GL_EXT_buffer_reference2" }, 2,
136  WG_SIZE, 1, 1,
137  0));
138  shared_shd_def(shd);
139 
140  GLSLC(0, );
141  GLSLC(0, void main() );
142  GLSLC(0, { );
143  GLSLC(1, uint64_t offset; );
144  GLSLC(1, DataBuffer dst; );
145  GLSLC(1, BlockBuffer b_dst; );
146  GLSLC(1, Block block; );
147  GLSLC(1, DTYPE s2; );
148  GLSLC(1, DTYPE prefix_sum; );
149  GLSLC(1, ivec2 pos; );
150  GLSLC(1, int k; );
151  GLSLC(1, int o; );
152  GLSLC(0, );
153  GLSLC(1, DataBuffer integral_data; );
154  GLSLC(0, );
155  GLSLC(1, uint c_plane; );
156  GLSLC(0, );
157  GLSLC(1, uint comp_idx = uint(gl_WorkGroupID.y); );
158  GLSLC(1, uint invoc_idx = uint(gl_WorkGroupID.z); );
159  GLSLC(0, );
160  GLSLC(1, if (strength[comp_idx] == 0.0) );
161  GLSLC(2, return; );
162  GLSLC(0, );
163  GLSLC(1, offset = integral_size * (invoc_idx * nb_components + comp_idx); );
164  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
165  GLSLC(0, );
166  GLSLC(1, c_plane = comp_plane[comp_idx]; );
167  GLSLC(0, );
168  GLSLC(1, pos.y = int(gl_GlobalInvocationID.x); );
169  GLSLC(1, if (pos.y < height[c_plane]) { );
170  GLSLC(2, prefix_sum = DTYPE(0); );
171  GLSLC(2, offset = int_stride * uint64_t(pos.y); );
172  GLSLC(2, b_dst = BlockBuffer(uint64_t(integral_data) + offset); );
173  GLSLC(0, );
174  GLSLC(2, for (k = 0; k * T_BLOCK_ELEMS < width[c_plane]; k++) { );
175  GLSLC(3, block = b_dst.v[k]; );
176  GLSLC(3, for (o = 0; o < T_BLOCK_ELEMS; o++) { );
177  GLSLC(4, s2 = block.data[o]; );
178  GLSLC(4, block.data[o] = s2 + prefix_sum; );
179  GLSLC(4, prefix_sum += s2; );
180  GLSLC(3, } );
181  GLSLC(3, b_dst.v[k] = block; );
182  GLSLC(2, } );
183  GLSLC(1, } );
184  GLSLC(0, } );
185 
186  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
187  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
188 
189  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
190 
191  shd = shd_vertical;
192  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_vertical",
193  VK_SHADER_STAGE_COMPUTE_BIT,
194  (const char *[]) { "GL_EXT_buffer_reference",
195  "GL_EXT_buffer_reference2" }, 2,
196  WG_SIZE, 1, 1,
197  0));
198  shared_shd_def(shd);
199 
200  desc_set = (FFVulkanDescriptorSetBinding []) {
201  {
202  .name = "input_img",
203  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
204  .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT),
205  .mem_quali = "readonly",
206  .dimensions = 2,
207  .elems = planes,
208  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
209  },
210  };
211  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 0, 0));
212 
213  desc_set = (FFVulkanDescriptorSetBinding []) {
214  {
215  .name = "xyoffsets_buffer",
216  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
217  .mem_quali = "readonly",
218  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
219  .buf_content = "ivec2 xyoffsets[];",
220  },
221  };
222  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0));
223 
224  GLSLC(0, );
225  GLSLC(0, void main() );
226  GLSLC(0, { );
227  GLSLC(1, uint64_t offset; );
228  GLSLC(1, DataBuffer dst; );
229  GLSLC(1, float s1; );
230  GLSLC(1, DTYPE s2; );
231  GLSLC(1, DTYPE prefix_sum; );
232  GLSLC(1, uvec2 size; );
233  GLSLC(1, ivec2 pos; );
234  GLSLC(1, ivec2 pos_off; );
235  GLSLC(0, );
236  GLSLC(1, DataBuffer integral_data; );
237  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
238  GLSLC(0, );
239  GLSLC(1, uint c_off; );
240  GLSLC(1, uint c_plane; );
241  GLSLC(0, );
242  GLSLC(1, uint comp_idx = uint(gl_WorkGroupID.y); );
243  GLSLC(1, uint invoc_idx = uint(gl_WorkGroupID.z); );
244  GLSLC(0, );
245  GLSLC(1, if (strength[comp_idx] == 0.0) );
246  GLSLC(2, return; );
247  GLSLC(0, );
248  GLSLC(1, offset = integral_size * (invoc_idx * nb_components + comp_idx); );
249  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
250  for (int i = 0; i < TYPE_ELEMS; i++)
251  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
252  GLSLC(0, );
253  GLSLC(1, c_off = comp_off[comp_idx]; );
254  GLSLC(1, c_plane = comp_plane[comp_idx]; );
255  GLSLC(1, size = imageSize(input_img[c_plane]); );
256  GLSLC(0, );
257  GLSLC(1, pos.x = int(gl_GlobalInvocationID.x); );
258  GLSLC(1, if (pos.x < width[c_plane]) { );
259  GLSLC(2, prefix_sum = DTYPE(0); );
260  GLSLC(2, for (pos.y = 0; pos.y < height[c_plane]; pos.y++) { );
261  GLSLC(3, offset = int_stride * uint64_t(pos.y); );
262  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
263  GLSLC(4, s1 = imageLoad(input_img[c_plane], pos)[c_off]; );
264  for (int i = 0; i < TYPE_ELEMS; i++) {
265  GLSLF(4, pos_off = pos + offs[%i]; ,i);
266  GLSLC(4, if (!IS_WITHIN(uvec2(pos_off), size)) );
267  GLSLF(5, s2[%i] = s1; ,i);
268  GLSLC(4, else );
269  GLSLF(5, s2[%i] = imageLoad(input_img[c_plane], pos_off)[c_off]; ,i);
270  }
271  GLSLC(4, s2 = (s1 - s2) * (s1 - s2); );
272  GLSLC(3, dst.v[pos.x] = s2 + prefix_sum; );
273  GLSLC(3, prefix_sum += s2; );
274  GLSLC(2, } );
275  GLSLC(1, } );
276  GLSLC(0, } );
277 
278  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
279  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
280 
281  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
282 
283 fail:
284  if (spv_opaque)
285  spv->free_shader(spv, &spv_opaque);
286 
287  return err;
288 }
289 
290 typedef struct WeightsPushData {
291  uint32_t width[4];
292  uint32_t height[4];
293  uint32_t ws_offset[4];
294  uint32_t ws_stride[4];
296  float strength[4];
297  uint32_t comp_off[4];
298  uint32_t comp_plane[4];
299  VkDeviceAddress integral_base;
300  uint64_t integral_size;
301  uint64_t int_stride;
302  uint32_t xyoffs_start;
303  uint32_t ws_count;
304  uint32_t nb_components;
306 
308  FFVulkanShader *shd,
309  FFVkSPIRVCompiler *spv,
310  const AVPixFmtDescriptor *desc,
311  int planes)
312 {
313  int err;
314  uint8_t *spv_data;
315  size_t spv_len;
316  void *spv_opaque = NULL;
318 
319  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_weights",
320  VK_SHADER_STAGE_COMPUTE_BIT,
321  (const char *[]) { "GL_EXT_buffer_reference",
322  "GL_EXT_buffer_reference2" }, 2,
323  WG_SIZE, WG_SIZE, 1,
324  0));
325 
326  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
327  GLSLC(0, );
328  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
329  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
330  GLSLC(0, );
331  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
332  GLSLC(1, DTYPE v[]; );
333  GLSLC(0, }; );
334  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
335  GLSLC(1, uvec4 width; );
336  GLSLC(1, uvec4 height; );
337  GLSLC(1, uvec4 ws_offset; );
338  GLSLC(1, uvec4 ws_stride; );
339  GLSLC(1, ivec4 patch_size; );
340  GLSLC(1, vec4 strength; );
341  GLSLC(1, uvec4 comp_off; );
342  GLSLC(1, uvec4 comp_plane; );
343  GLSLC(1, DataBuffer integral_base; );
344  GLSLC(1, uint64_t integral_size; );
345  GLSLC(1, uint64_t int_stride; );
346  GLSLC(1, uint xyoffs_start; );
347  GLSLC(1, uint ws_count; );
348  GLSLC(1, uint nb_components; );
349  GLSLC(0, }; );
350  GLSLC(0, );
351 
353  VK_SHADER_STAGE_COMPUTE_BIT);
354 
355  desc_set = (FFVulkanDescriptorSetBinding []) {
356  {
357  .name = "input_img",
358  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
359  .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT),
360  .mem_quali = "readonly",
361  .dimensions = 2,
362  .elems = planes,
363  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
364  },
365  {
366  .name = "weights_buffer",
367  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
368  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
369  .buf_content = "float weights[];",
370  },
371  {
372  .name = "sums_buffer",
373  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
374  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
375  .buf_content = "float sums[];",
376  },
377  };
378  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 3, 0, 0));
379 
380  desc_set = (FFVulkanDescriptorSetBinding []) {
381  {
382  .name = "xyoffsets_buffer",
383  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
384  .mem_quali = "readonly",
385  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
386  .buf_content = "ivec2 xyoffsets[];",
387  },
388  };
389  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0));
390 
391  GLSLC(0, );
392  GLSLC(0, void main() );
393  GLSLC(0, { );
394  GLSLC(1, uint64_t offset; );
395  GLSLC(1, DataBuffer dst; );
396  GLSLC(1, uvec2 size; );
397  GLSLC(1, ivec2 pos; );
398  GLSLC(1, ivec2 pos_off; );
399  GLSLC(1, int p; );
400  GLSLC(1, float s; );
401  GLSLC(0, );
402  GLSLC(1, DataBuffer integral_data; );
403  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
404  GLSLC(0, );
405  GLSLC(1, uint c_off; );
406  GLSLC(1, uint c_plane; );
407  GLSLC(1, uint ws_off; );
408  GLSLC(0, );
409  GLSLC(1, pos = ivec2(gl_GlobalInvocationID.xy); );
410  GLSLC(1, uint comp_idx = uint(gl_WorkGroupID.z) %% nb_components; );
411  GLSLC(1, uint invoc_idx = uint(gl_WorkGroupID.z) / nb_components; );
412  GLSLC(0, );
413  GLSLC(1, c_off = comp_off[comp_idx]; );
414  GLSLC(1, c_plane = comp_plane[comp_idx]; );
415  GLSLC(1, p = patch_size[comp_idx]; );
416  GLSLC(1, s = strength[comp_idx]; );
417  GLSLC(1, if (s == 0.0 || pos.x < p || pos.y < p || pos.x >= width[c_plane] - p || pos.y >= height[c_plane] - p) );
418  GLSLC(2, return; );
419  GLSLC(0, );
420  GLSLC(1, offset = integral_size * (invoc_idx * nb_components + comp_idx); );
421  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
422  for (int i = 0; i < TYPE_ELEMS; i++)
423  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
424  GLSLC(0, );
425  GLSLC(1, ws_off = ws_count * invoc_idx + ws_offset[comp_idx] + pos.y * ws_stride[comp_idx] + pos.x; );
426  GLSLC(1, size = imageSize(input_img[c_plane]); );
427  GLSLC(0, );
428  GLSLC(1, DTYPE a; );
429  GLSLC(1, DTYPE b; );
430  GLSLC(1, DTYPE c; );
431  GLSLC(1, DTYPE d; );
432  GLSLC(0, );
433  GLSLC(1, DTYPE patch_diff; );
434  GLSLC(1, vec4 src; );
435  GLSLC(1, vec4 w; );
436  GLSLC(1, float w_sum; );
437  GLSLC(1, float sum; );
438  GLSLC(0, );
439  for (int i = 0; i < 4; i++) {
440  GLSLF(1, pos_off = pos + offs[%i]; ,i);
441  GLSLC(1, if (!IS_WITHIN(uvec2(pos_off), size)) );
442  GLSLF(2, src[%i] = imageLoad(input_img[c_plane], pos)[c_off]; ,i);
443  GLSLC(1, else );
444  GLSLF(2, src[%i] = imageLoad(input_img[c_plane], pos_off)[c_off]; ,i);
445  }
446  GLSLC(0, );
447  GLSLC(1, offset = int_stride * uint64_t(pos.y - p); );
448  GLSLC(1, dst = DataBuffer(uint64_t(integral_data) + offset); );
449  GLSLC(1, a = dst.v[pos.x - p]; );
450  GLSLC(1, c = dst.v[pos.x + p]; );
451  GLSLC(1, offset = int_stride * uint64_t(pos.y + p); );
452  GLSLC(1, dst = DataBuffer(uint64_t(integral_data) + offset); );
453  GLSLC(1, b = dst.v[pos.x - p]; );
454  GLSLC(1, d = dst.v[pos.x + p]; );
455  GLSLC(0, );
456  GLSLC(1, patch_diff = d + a - b - c; );
457  GLSLC(1, w = exp(patch_diff * s); );
458  GLSLC(1, w_sum = w[0] + w[1] + w[2] + w[3]; );
459  GLSLC(1, sum = dot(w, src * 255); );
460  GLSLC(0, );
461  GLSLC(1, weights[ws_off] += w_sum; );
462  GLSLC(1, sums[ws_off] += sum; );
463  GLSLC(0, } );
464 
465  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
466  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
467 
468  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
469 
470 fail:
471  if (spv_opaque)
472  spv->free_shader(spv, &spv_opaque);
473 
474  return err;
475 }
476 
477 typedef struct DenoisePushData {
478  uint32_t comp_off[4];
479  uint32_t comp_plane[4];
480  uint32_t ws_offset[4];
481  uint32_t ws_stride[4];
482  uint32_t ws_count;
483  uint32_t t;
484  uint32_t nb_components;
486 
489  const AVPixFmtDescriptor *desc, int planes)
490 {
491  int err;
492  uint8_t *spv_data;
493  size_t spv_len;
494  void *spv_opaque = NULL;
496  RET(ff_vk_shader_init(vkctx, shd, "nlmeans_denoise",
497  VK_SHADER_STAGE_COMPUTE_BIT,
498  (const char *[]) { "GL_EXT_buffer_reference",
499  "GL_EXT_buffer_reference2" }, 2,
500  WG_SIZE, WG_SIZE, 1,
501  0));
502 
503  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
504  GLSLC(1, uvec4 comp_off; );
505  GLSLC(1, uvec4 comp_plane; );
506  GLSLC(1, uvec4 ws_offset; );
507  GLSLC(1, uvec4 ws_stride; );
508  GLSLC(1, uint32_t ws_count; );
509  GLSLC(1, uint32_t t; );
510  GLSLC(1, uint32_t nb_components; );
511  GLSLC(0, }; );
512 
514  VK_SHADER_STAGE_COMPUTE_BIT);
515 
516  desc_set = (FFVulkanDescriptorSetBinding []) {
517  {
518  .name = "input_img",
519  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
520  .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT),
521  .mem_quali = "readonly",
522  .dimensions = 2,
523  .elems = planes,
524  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
525  },
526  {
527  .name = "output_img",
528  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
529  .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format, FF_VK_REP_FLOAT),
530  .mem_quali = "writeonly",
531  .dimensions = 2,
532  .elems = planes,
533  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
534  },
535  };
536  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0));
537 
538  desc_set = (FFVulkanDescriptorSetBinding []) {
539  {
540  .name = "weights_buffer",
541  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
542  .mem_quali = "readonly",
543  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
544  .buf_content = "float weights[];",
545  },
546  {
547  .name = "sums_buffer",
548  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
549  .mem_quali = "readonly",
550  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
551  .buf_content = "float sums[];",
552  },
553  };
554 
555  RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0));
556 
557  GLSLC(0, void main() );
558  GLSLC(0, { );
559  GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
560  GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); );
561  GLSLC(1, const uvec2 size = imageSize(output_img[plane]); );
562  GLSLC(0, );
563  GLSLC(1, uint c_off; );
564  GLSLC(1, uint c_plane; );
565  GLSLC(1, uint ws_off; );
566  GLSLC(0, );
567  GLSLC(1, float w_sum; );
568  GLSLC(1, float sum; );
569  GLSLC(1, vec4 src; );
570  GLSLC(1, vec4 r; );
571  GLSLC(1, uint invoc_idx; );
572  GLSLC(1, uint comp_idx; );
573  GLSLC(0, );
574  GLSLC(1, if (!IS_WITHIN(pos, size)) );
575  GLSLC(2, return; );
576  GLSLC(0, );
577  GLSLC(1, src = imageLoad(input_img[plane], pos); );
578  GLSLC(1, for (comp_idx = 0; comp_idx < nb_components; comp_idx++) { );
579  GLSLC(2, if (plane == comp_plane[comp_idx]) { );
580  GLSLC(3, w_sum = 0.0; );
581  GLSLC(3, sum = 0.0; );
582  GLSLC(3, for (invoc_idx = 0; invoc_idx < t; invoc_idx++) { );
583  GLSLC(4, ws_off = ws_count * invoc_idx + ws_offset[comp_idx] + pos.y * ws_stride[comp_idx] + pos.x; );
584  GLSLC(4, w_sum += weights[ws_off]; );
585  GLSLC(4, sum += sums[ws_off]; );
586  GLSLC(3, } );
587  GLSLC(3, c_off = comp_off[comp_idx]; );
588  GLSLC(3, r[c_off] = (sum + src[c_off] * 255) / (1.0 + w_sum) / 255; );
589  GLSLC(2, } );
590  GLSLC(1, } );
591  GLSLC(1, imageStore(output_img[plane], pos, r); );
592  GLSLC(0, } );
593 
594  RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque));
595  RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
596 
597  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
598 
599 fail:
600  if (spv_opaque)
601  spv->free_shader(spv, &spv_opaque);
602 
603  return err;
604 }
605 
607 {
608  int rad, err;
609  int xcnt = 0, ycnt = 0;
610  NLMeansVulkanContext *s = ctx->priv;
611  FFVulkanContext *vkctx = &s->vkctx;
612  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
613  FFVkSPIRVCompiler *spv = NULL;
614  int *offsets_buf;
615  int offsets_dispatched = 0, nb_dispatches = 0;
616 
617  const AVPixFmtDescriptor *desc;
619  if (!desc)
620  return AVERROR(EINVAL);
621 
622  if (!(s->opts.r & 1)) {
623  s->opts.r |= 1;
624  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
625  s->opts.r);
626  }
627 
628  if (!(s->opts.p & 1)) {
629  s->opts.p |= 1;
630  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
631  s->opts.p);
632  }
633 
634  for (int i = 0; i < 4; i++) {
635  double str = !isnan(s->opts.sc[i]) ? s->opts.sc[i] : s->opts.s;
636  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
637  if (str == 0.0) {
638  s->strength[i] = 0.0;
639  } else {
640  str = 10.0f*str;
641  str *= -str;
642  str = 255.0*255.0 / str;
643  s->strength[i] = str;
644  }
645  if (!(ps & 1)) {
646  ps |= 1;
647  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
648  ps);
649  }
650  s->patch[i] = ps / 2;
651  }
652 
653  rad = s->opts.r/2;
654  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
655  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
656  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
657  s->nb_offsets = 0;
658 
659  for (int x = -rad; x <= rad; x++) {
660  for (int y = -rad; y <= rad; y++) {
661  if (!x && !y)
662  continue;
663 
664  s->xoffsets[xcnt++] = x;
665  s->yoffsets[ycnt++] = y;
666  s->nb_offsets++;
667  }
668  }
669 
670  RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL,
671  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
672  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
673  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
674  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
675  RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0));
676 
677  for (int i = 0; i < 2*s->nb_offsets; i += 2) {
678  offsets_buf[i + 0] = s->xoffsets[i >> 1];
679  offsets_buf[i + 1] = s->yoffsets[i >> 1];
680  }
681 
682  RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1));
683 
684  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
685 
686  spv = ff_vk_spirv_init();
687  if (!spv) {
688  av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
689  return AVERROR_EXTERNAL;
690  }
691 
692  s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
693  if (!s->qf) {
694  av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n");
695  err = AVERROR(ENOTSUP);
696  goto fail;
697  }
698 
699  RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, 1, 0, 0, 0, NULL));
700 
701  RET(init_integral_pipeline(vkctx, &s->e, &s->shd_horizontal, &s->shd_vertical,
702  spv, desc, planes));
703 
704  RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights, spv, desc, planes));
705 
706  RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise, spv, desc, planes));
707 
708  RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_vertical,
709  1, 0, 0,
710  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
711  VK_FORMAT_UNDEFINED));
712 
713  RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_weights,
714  1, 0, 0,
715  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
716  VK_FORMAT_UNDEFINED));
717 
718  do {
719  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
720  offsets_dispatched += wg_invoc * TYPE_ELEMS;
721  nb_dispatches++;
722  } while (offsets_dispatched < s->nb_offsets);
723 
724  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n",
725  s->nb_offsets, nb_dispatches);
726 
727  s->initialized = 1;
728 
729 fail:
730  if (spv)
731  spv->uninit(&spv);
732 
733  return err;
734 }
735 
737  FFVkBuffer *ws_vk, uint32_t comp_offs[4], uint32_t comp_planes[4],
738  uint32_t ws_offset[4], uint32_t ws_stride[4],
739  uint32_t ws_count, uint32_t t, uint32_t nb_components)
740 {
741  FFVulkanContext *vkctx = &s->vkctx;
742  FFVulkanFunctions *vk = &vkctx->vkfn;
743  VkBufferMemoryBarrier2 buf_bar[2];
744  int nb_buf_bar = 0;
745 
746  DenoisePushData pd = {
747  { comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
748  { comp_planes[0], comp_planes[1], comp_planes[2], comp_planes[3] },
749  { ws_offset[0], ws_offset[1], ws_offset[2], ws_offset[3] },
750  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
751  ws_count,
752  t,
753  nb_components,
754  };
755 
756  /* Denoise pass pipeline */
757  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_denoise);
758 
759  /* Push data */
760  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_denoise,
761  VK_SHADER_STAGE_COMPUTE_BIT,
762  0, sizeof(pd), &pd);
763 
764  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
765  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
766  .srcStageMask = ws_vk->stage,
767  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
768  .srcAccessMask = ws_vk->access,
769  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
770  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
771  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
772  .buffer = ws_vk->buf,
773  .size = ws_vk->size,
774  .offset = 0,
775  };
776 
777  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
778  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
779  .pBufferMemoryBarriers = buf_bar,
780  .bufferMemoryBarrierCount = nb_buf_bar,
781  });
782  ws_vk->stage = buf_bar[0].dstStageMask;
783  ws_vk->access = buf_bar[0].dstAccessMask;
784 
785  /* End of denoise pass */
786  vk->CmdDispatch(exec->buf,
787  FFALIGN(vkctx->output_width, s->shd_denoise.lg_size[0])/s->shd_denoise.lg_size[0],
788  FFALIGN(vkctx->output_height, s->shd_denoise.lg_size[1])/s->shd_denoise.lg_size[1],
789  av_pix_fmt_count_planes(s->vkctx.output_format));
790 
791  return 0;
792 }
793 
795 {
796  int err;
797  AVFrame *out = NULL;
798  AVFilterContext *ctx = link->dst;
799  NLMeansVulkanContext *s = ctx->priv;
800  AVFilterLink *outlink = ctx->outputs[0];
801  FFVulkanContext *vkctx = &s->vkctx;
802  FFVulkanFunctions *vk = &vkctx->vkfn;
803 
804  const AVPixFmtDescriptor *desc;
805  int comp_offs[4];
806  int comp_planes[4];
807  int plane_widths[4];
808  int plane_heights[4];
809 
810  int offsets_dispatched = 0;
811 
812  /* Integral */
813  AVBufferRef *integral_buf = NULL;
814  FFVkBuffer *integral_vk;
815  size_t int_stride;
816  size_t int_size;
817 
818  /* Weights/sums */
819  AVBufferRef *ws_buf = NULL;
820  FFVkBuffer *ws_vk;
821  uint32_t ws_count = 0;
822  uint32_t ws_offset[4];
823  uint32_t ws_stride[4];
824  size_t ws_size;
825 
826  FFVkExecContext *exec;
827  VkImageView in_views[AV_NUM_DATA_POINTERS];
828  VkImageView out_views[AV_NUM_DATA_POINTERS];
829  VkImageMemoryBarrier2 img_bar[8];
830  int nb_img_bar = 0;
831  VkBufferMemoryBarrier2 buf_bar[2];
832  int nb_buf_bar = 0;
833 
834  if (!s->initialized)
835  RET(init_filter(ctx));
836 
838  if (!desc)
839  return AVERROR(EINVAL);
840 
841  /* Integral image */
842  int_stride = FFALIGN(vkctx->output_width, s->shd_vertical.lg_size[0]) * TYPE_SIZE;
843  int_size = FFALIGN(vkctx->output_height, s->shd_horizontal.lg_size[0]) * int_stride;
844 
845  /* Plane dimensions */
846  for (int i = 0; i < desc->nb_components; i++) {
847  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
848  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_h);
849  plane_widths[i] = FFALIGN(plane_widths[i], s->shd_denoise.lg_size[0]);
850  plane_heights[i] = FFALIGN(plane_heights[i], s->shd_denoise.lg_size[1]);
851 
852  comp_offs[i] = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
853  comp_planes[i] = desc->comp[i].plane;
854 
855  ws_stride[i] = plane_widths[i];
856  ws_offset[i] = ws_count;
857  ws_count += ws_stride[i] * plane_heights[i];
858  }
859 
860  ws_size = ws_count * sizeof(float);
861 
862  /* Buffers */
863  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
864  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
865  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
866  NULL,
867  int_size * s->opts.t * desc->nb_components,
868  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
869  if (err < 0)
870  return err;
871  integral_vk = (FFVkBuffer *)integral_buf->data;
872 
873  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
874  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
875  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
876  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
877  NULL,
878  ws_size * s-> opts.t * 2,
879  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
880  if (err < 0)
881  return err;
882  ws_vk = (FFVkBuffer *)ws_buf->data;
883 
884  /* Output frame */
885  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
886  if (!out) {
887  err = AVERROR(ENOMEM);
888  goto fail;
889  }
890 
891  /* Execution context */
892  exec = ff_vk_exec_get(&s->vkctx, &s->e);
893  ff_vk_exec_start(vkctx, exec);
894 
895  /* Dependencies */
896  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
897  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
898  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
899  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
900  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
901  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
902 
903  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
904  integral_buf = NULL;
905 
906  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
907  ws_buf = NULL;
908 
909  /* Input frame prep */
910  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT));
911  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
912  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
913  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
914  VK_ACCESS_SHADER_READ_BIT,
915  VK_IMAGE_LAYOUT_GENERAL,
916  VK_QUEUE_FAMILY_IGNORED);
917 
918  /* Output frame prep */
919  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT));
920  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
921  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
922  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
923  VK_ACCESS_SHADER_WRITE_BIT,
924  VK_IMAGE_LAYOUT_GENERAL,
925  VK_QUEUE_FAMILY_IGNORED);
926 
927  nb_buf_bar = 0;
928  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
929  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
930  .srcStageMask = ws_vk->stage,
931  .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
932  .srcAccessMask = ws_vk->access,
933  .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
934  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
935  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
936  .buffer = ws_vk->buf,
937  .size = ws_vk->size,
938  .offset = 0,
939  };
940 
941  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
942  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
943  .pImageMemoryBarriers = img_bar,
944  .imageMemoryBarrierCount = nb_img_bar,
945  .pBufferMemoryBarriers = buf_bar,
946  .bufferMemoryBarrierCount = nb_buf_bar,
947  });
948  ws_vk->stage = buf_bar[0].dstStageMask;
949  ws_vk->access = buf_bar[0].dstAccessMask;
950 
951  /* Buffer zeroing */
952  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
953 
954  /* Update integral descriptors */
955  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_vertical, in, in_views, 0, 0,
956  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
957  /* Update weights descriptors */
958  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_weights, in, in_views, 0, 0,
959  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
960  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1, 0,
961  ws_vk, 0, ws_size * s-> opts.t,
962  VK_FORMAT_UNDEFINED));
963  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 2, 0,
964  ws_vk, ws_size * s-> opts.t, ws_size * s-> opts.t,
965  VK_FORMAT_UNDEFINED));
966 
967  /* Update denoise descriptors */
968  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, in, in_views, 0, 0,
969  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
970  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, out, out_views, 0, 1,
971  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
972  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, 0, 0,
973  ws_vk, 0, ws_size * s-> opts.t,
974  VK_FORMAT_UNDEFINED));
975  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, 1, 0,
976  ws_vk, ws_size * s-> opts.t, ws_size * s-> opts.t,
977  VK_FORMAT_UNDEFINED));
978 
979  do {
980  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
981 
982  /* Integral pipeline */
983  IntegralPushData pd = {
984  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
985  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
986  { s->strength[0], s->strength[1], s->strength[2], s->strength[3], },
987  { comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
988  { comp_planes[0], comp_planes[1], comp_planes[2], comp_planes[3] },
989  integral_vk->address,
990  (uint64_t)int_size,
991  (uint64_t)int_stride,
992  offsets_dispatched,
993  desc->nb_components,
994  };
995 
996  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_vertical);
997  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_vertical,
998  VK_SHADER_STAGE_COMPUTE_BIT,
999  0, sizeof(pd), &pd);
1000 
1001  nb_buf_bar = 0;
1002  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
1003  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
1004  .srcStageMask = integral_vk->stage,
1005  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1006  .srcAccessMask = integral_vk->access,
1007  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
1008  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1009  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1010  .buffer = integral_vk->buf,
1011  .size = integral_vk->size,
1012  .offset = 0,
1013  };
1014  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1015  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1016  .pBufferMemoryBarriers = buf_bar,
1017  .bufferMemoryBarrierCount = nb_buf_bar,
1018  });
1019  integral_vk->stage = buf_bar[0].dstStageMask;
1020  integral_vk->access = buf_bar[0].dstAccessMask;
1021 
1022  /* End of vertical pass */
1023  vk->CmdDispatch(exec->buf,
1024  FFALIGN(vkctx->output_width, s->shd_vertical.lg_size[0])/s->shd_vertical.lg_size[0],
1025  desc->nb_components,
1026  wg_invoc);
1027 
1028  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_horizontal);
1029  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_horizontal,
1030  VK_SHADER_STAGE_COMPUTE_BIT,
1031  0, sizeof(pd), &pd);
1032 
1033  nb_buf_bar = 0;
1034  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
1035  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
1036  .srcStageMask = integral_vk->stage,
1037  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1038  .srcAccessMask = integral_vk->access,
1039  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
1040  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
1041  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1042  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1043  .buffer = integral_vk->buf,
1044  .size = integral_vk->size,
1045  .offset = 0,
1046  };
1047  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1048  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1049  .pBufferMemoryBarriers = buf_bar,
1050  .bufferMemoryBarrierCount = nb_buf_bar,
1051  });
1052  integral_vk->stage = buf_bar[0].dstStageMask;
1053  integral_vk->access = buf_bar[0].dstAccessMask;
1054 
1055  /* End of horizontal pass */
1056  vk->CmdDispatch(exec->buf,
1057  FFALIGN(vkctx->output_height, s->shd_horizontal.lg_size[0])/s->shd_horizontal.lg_size[0],
1058  desc->nb_components,
1059  wg_invoc);
1060 
1061  /* Weights pipeline */
1062  WeightsPushData wpd = {
1063  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
1064  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
1065  { ws_offset[0], ws_offset[1], ws_offset[2], ws_offset[3] },
1066  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
1067  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
1068  { s->strength[0], s->strength[1], s->strength[2], s->strength[3], },
1069  { comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
1070  { comp_planes[0], comp_planes[1], comp_planes[2], comp_planes[3] },
1071  integral_vk->address,
1072  (uint64_t)int_size,
1073  (uint64_t)int_stride,
1074  offsets_dispatched,
1075  ws_count,
1076  desc->nb_components,
1077  };
1078 
1079  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_weights);
1080  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_weights,
1081  VK_SHADER_STAGE_COMPUTE_BIT,
1082  0, sizeof(wpd), &wpd);
1083 
1084  nb_buf_bar = 0;
1085  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
1086  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
1087  .srcStageMask = integral_vk->stage,
1088  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1089  .srcAccessMask = integral_vk->access,
1090  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
1091  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1092  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1093  .buffer = integral_vk->buf,
1094  .size = integral_vk->size,
1095  .offset = 0,
1096  };
1097  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
1098  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
1099  .srcStageMask = ws_vk->stage,
1100  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1101  .srcAccessMask = ws_vk->access,
1102  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
1103  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
1104  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1105  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
1106  .buffer = ws_vk->buf,
1107  .size = ws_vk->size,
1108  .offset = 0,
1109  };
1110  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1111  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1112  .pBufferMemoryBarriers = buf_bar,
1113  .bufferMemoryBarrierCount = nb_buf_bar,
1114  });
1115  integral_vk->stage = buf_bar[0].dstStageMask;
1116  integral_vk->access = buf_bar[0].dstAccessMask;
1117  ws_vk->stage = buf_bar[1].dstStageMask;
1118  ws_vk->access = buf_bar[1].dstAccessMask;
1119 
1120  /* End of weights pass */
1121  vk->CmdDispatch(exec->buf,
1122  FFALIGN(vkctx->output_width, s->shd_weights.lg_size[0])/s->shd_weights.lg_size[0],
1123  FFALIGN(vkctx->output_height, s->shd_weights.lg_size[1])/s->shd_weights.lg_size[1],
1124  wg_invoc * desc->nb_components);
1125 
1126  offsets_dispatched += wg_invoc * TYPE_ELEMS;
1127  } while (offsets_dispatched < s->nb_offsets);
1128 
1129  RET(denoise_pass(s, exec, ws_vk, comp_offs, comp_planes, ws_offset, ws_stride,
1130  ws_count, s->opts.t, desc->nb_components));
1131 
1132  err = ff_vk_exec_submit(vkctx, exec);
1133  if (err < 0)
1134  return err;
1135 
1136  err = av_frame_copy_props(out, in);
1137  if (err < 0)
1138  goto fail;
1139 
1140  av_frame_free(&in);
1141 
1142  return ff_filter_frame(outlink, out);
1143 
1144 fail:
1145  av_buffer_unref(&integral_buf);
1146  av_buffer_unref(&ws_buf);
1147  av_frame_free(&in);
1148  av_frame_free(&out);
1149  return err;
1150 }
1151 
1153 {
1154  NLMeansVulkanContext *s = avctx->priv;
1155  FFVulkanContext *vkctx = &s->vkctx;
1156 
1157  ff_vk_exec_pool_free(vkctx, &s->e);
1158  ff_vk_shader_free(vkctx, &s->shd_horizontal);
1159  ff_vk_shader_free(vkctx, &s->shd_vertical);
1160  ff_vk_shader_free(vkctx, &s->shd_weights);
1161  ff_vk_shader_free(vkctx, &s->shd_denoise);
1162 
1163  av_buffer_pool_uninit(&s->integral_buf_pool);
1164  av_buffer_pool_uninit(&s->ws_buf_pool);
1165 
1166  ff_vk_uninit(&s->vkctx);
1167 
1168  av_freep(&s->xoffsets);
1169  av_freep(&s->yoffsets);
1170 
1171  s->initialized = 0;
1172 }
1173 
1174 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1175 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1177  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0.0, 100.0, FLAGS },
1178  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
1179  { "r", "research window size", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
1180  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 8 }, 1, 64, FLAGS },
1181 
1182  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
1183  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
1184  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
1185  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
1186 
1187  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1188  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1189  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1190  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1191 
1192  { NULL }
1193 };
1194 
1195 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
1196 
1198  {
1199  .name = "default",
1200  .type = AVMEDIA_TYPE_VIDEO,
1201  .filter_frame = &nlmeans_vulkan_filter_frame,
1202  .config_props = &ff_vk_filter_config_input,
1203  },
1204 };
1205 
1207  {
1208  .name = "default",
1209  .type = AVMEDIA_TYPE_VIDEO,
1210  .config_props = &ff_vk_filter_config_output,
1211  },
1212 };
1213 
1215  .p.name = "nlmeans_vulkan",
1216  .p.description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1217  .p.priv_class = &nlmeans_vulkan_class,
1218  .p.flags = AVFILTER_FLAG_HWDEVICE,
1219  .priv_size = sizeof(NLMeansVulkanContext),
1220  .init = &ff_vk_filter_init,
1225  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
1226 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:117
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:64
DenoisePushData::comp_plane
uint32_t comp_plane[4]
Definition: vf_nlmeans_vulkan.c:479
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:1021
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:216
WeightsPushData::int_stride
uint64_t int_stride
Definition: vf_nlmeans_vulkan.c:301
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(nlmeans_vulkan)
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:318
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ff_vf_nlmeans_vulkan
const FFFilter ff_vf_nlmeans_vulkan
Definition: vf_nlmeans_vulkan.c:1214
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
Free a shader.
Definition: vulkan.c:2923
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, VkPipelineStageFlags stage, const char *extensions[], int nb_extensions, int lg_x, int lg_y, int lg_z, uint32_t required_subgroup_size)
Initialize a shader object, with a specific set of extensions, type+bind, local group size,...
Definition: vulkan.c:2056
out
FILE * out
Definition: movenc.c:55
NLMeansVulkanContext::shd_weights
FFVulkanShader shd_weights
Definition: vf_nlmeans_vulkan.c:51
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
WeightsPushData::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:296
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1067
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3447
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
RET
#define RET(x)
Definition: vulkan.h:66
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:356
FFVkBuffer::access
VkAccessFlags2 access
Definition: vulkan.h:96
WeightsPushData
Definition: vf_nlmeans_vulkan.c:290
FFVkBuffer::stage
VkPipelineStageFlags2 stage
Definition: vulkan.h:95
IntegralPushData::height
uint32_t height[4]
Definition: vf_nlmeans_vulkan.c:72
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:64
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:44
planes
static const struct @532 planes[]
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:263
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:427
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:233
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:534
w
uint8_t w
Definition: llviddspenc.c:38
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:54
AVOption
AVOption.
Definition: opt.h:429
b
#define b
Definition: input.c:42
data
const char data[16]
Definition: mxf.c:149
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:226
NLMeansVulkanContext::shd_horizontal
FFVulkanShader shd_horizontal
Definition: vf_nlmeans_vulkan.c:49
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:92
NLMeansVulkanContext::shd_vertical
FFVulkanShader shd_vertical
Definition: vf_nlmeans_vulkan.c:50
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:547
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:57
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:2964
FFVkSPIRVCompiler::uninit
void(* uninit)(struct FFVkSPIRVCompiler **ctx)
Definition: vulkan_spirv.h:32
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:220
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:40
IntegralPushData::int_stride
uint64_t int_stride
Definition: vf_nlmeans_vulkan.c:78
DenoisePushData::comp_off
uint32_t comp_off[4]
Definition: vf_nlmeans_vulkan.c:478
WeightsPushData::ws_offset
uint32_t ws_offset[4]
Definition: vf_nlmeans_vulkan.c:293
video.h
IntegralPushData::width
uint32_t width[4]
Definition: vf_nlmeans_vulkan.c:71
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:779
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:88
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:31
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:55
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3487
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:289
fail
#define fail()
Definition: checkasm.h:206
vulkan_filter.h
ff_vk_shader_update_img_array
void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Update a descriptor in a buffer with an image array.
Definition: vulkan.c:2800
ff_vk_shader_register_exec
int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd)
Register a shader with an exec pool.
Definition: vulkan.c:2563
NLMeansVulkanContext::shd_denoise
FFVulkanShader shd_denoise
Definition: vf_nlmeans_vulkan.c:52
ff_vk_shader_add_descriptor_set
int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only)
Add descriptor to a shader.
Definition: vulkan.c:2428
DenoisePushData::ws_offset
uint32_t ws_offset[4]
Definition: vf_nlmeans_vulkan.c:480
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:39
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:43
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
av_cold
#define av_cold
Definition: attributes.h:106
main
int main
Definition: dovi_rpuenc.c:38
WeightsPushData::integral_size
uint64_t integral_size
Definition: vf_nlmeans_vulkan.c:300
FFFilter
Definition: filters.h:266
float
float
Definition: af_crystalizer.c:122
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:317
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:45
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
shared_shd_def
static void shared_shd_def(FFVulkanShader *shd)
Definition: vf_nlmeans_vulkan.c:83
WeightsPushData::xyoffs_start
uint32_t xyoffs_start
Definition: vf_nlmeans_vulkan.c:302
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Underlying C type is double.
Definition: opt.h:267
FLAGS
#define FLAGS
Definition: vf_nlmeans_vulkan.c:1175
filters.h
FF_VK_REP_FLOAT
@ FF_VK_REP_FLOAT
Definition: vulkan.h:408
ctx
AVFormatContext * ctx
Definition: movenc.c:49
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:619
denoise_pass
static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec, FFVkBuffer *ws_vk, uint32_t comp_offs[4], uint32_t comp_planes[4], uint32_t ws_offset[4], uint32_t ws_stride[4], uint32_t ws_count, uint32_t t, uint32_t nb_components)
Definition: vf_nlmeans_vulkan.c:736
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:287
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:264
NAN
#define NAN
Definition: mathematics.h:115
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
WeightsPushData::nb_components
uint32_t nb_components
Definition: vf_nlmeans_vulkan.c:304
WeightsPushData::integral_base
VkDeviceAddress integral_base
Definition: vf_nlmeans_vulkan.c:299
IntegralPushData::integral_base
VkDeviceAddress integral_base
Definition: vf_nlmeans_vulkan.c:76
opts
AVDictionary * opts
Definition: movenc.c:51
ff_vk_shader_rep_fmt
const char * ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, enum FFVkShaderRepFormat rep_fmt)
Definition: vulkan.c:1589
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:599
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
WeightsPushData::height
uint32_t height[4]
Definition: vf_nlmeans_vulkan.c:292
nlmeans_vulkan_options
static const AVOption nlmeans_vulkan_options[]
Definition: vf_nlmeans_vulkan.c:1176
isnan
#define isnan(x)
Definition: libm.h:342
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:328
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:209
FFVkBuffer::size
size_t size
Definition: vulkan.h:91
IntegralPushData::comp_off
uint32_t comp_off[4]
Definition: vf_nlmeans_vulkan.c:74
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:60
FFVulkanContext
Definition: vulkan.h:274
WeightsPushData::comp_off
uint32_t comp_off[4]
Definition: vf_nlmeans_vulkan.c:297
WeightsPushData::ws_stride
uint32_t ws_stride[4]
Definition: vf_nlmeans_vulkan.c:294
exp
int8_t exp
Definition: eval.c:73
nlmeans_vulkan_inputs
static const AVFilterPad nlmeans_vulkan_inputs[]
Definition: vf_nlmeans_vulkan.c:1197
WG_SIZE
#define WG_SIZE
Definition: vf_nlmeans_vulkan.c:35
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:207
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:58
DenoisePushData
Definition: vf_nlmeans_vulkan.c:477
Block
Definition: flashsv2enc.c:70
NLMeansVulkanContext::qf
AVVulkanDeviceQueueFamily * qf
Definition: vf_nlmeans_vulkan.c:42
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:37
ff_vk_shader_update_push_const
void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Update push constant in a shader.
Definition: vulkan.c:2879
NLMeansVulkanContext::xyoffsets_buf
FFVkBuffer xyoffsets_buf
Definition: vf_nlmeans_vulkan.c:47
FFVulkanDescriptorSetBinding
Definition: vulkan.h:74
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
IntegralPushData::comp_plane
uint32_t comp_plane[4]
Definition: vf_nlmeans_vulkan.c:75
height
#define height
Definition: dsp.h:89
IntegralPushData
Definition: vf_nlmeans_vulkan.c:70
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:188
nlmeans_vulkan_outputs
static const AVFilterPad nlmeans_vulkan_outputs[]
Definition: vf_nlmeans_vulkan.c:1206
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:66
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:428
DenoisePushData::t
uint32_t t
Definition: vf_nlmeans_vulkan.c:483
FFVulkanShader
Definition: vulkan.h:190
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:319
nlmeans_vulkan_uninit
static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
Definition: vf_nlmeans_vulkan.c:1152
FFVkSPIRVCompiler::compile_shader
int(* compile_shader)(FFVulkanContext *s, struct FFVkSPIRVCompiler *ctx, FFVulkanShader *shd, uint8_t **data, size_t *size, const char *entrypoint, void **opaque)
Definition: vulkan_spirv.h:28
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AVERROR_EXTERNAL
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:59
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FFVkExecContext
Definition: vulkan.h:111
ff_vk_shader_update_desc_buffer
int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt)
Update a descriptor in a buffer with a buffer.
Definition: vulkan.c:2813
DenoisePushData::ws_stride
uint32_t ws_stride[4]
Definition: vf_nlmeans_vulkan.c:481
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:75
init_denoise_pipeline
static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd, FFVkSPIRVCompiler *spv, const AVPixFmtDescriptor *desc, int planes)
Definition: vf_nlmeans_vulkan.c:487
IntegralPushData::xyoffs_start
uint32_t xyoffs_start
Definition: vf_nlmeans_vulkan.c:79
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:32
FFVkSPIRVCompiler
Definition: vulkan_spirv.h:26
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
nlmeans_vulkan_filter_frame
static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
Definition: vf_nlmeans_vulkan.c:794
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:559
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
DenoisePushData::nb_components
uint32_t nb_components
Definition: vf_nlmeans_vulkan.c:484
init_filter
static av_cold int init_filter(AVFilterContext *ctx)
Definition: vf_nlmeans_vulkan.c:606
TYPE_BLOCK_ELEMS
#define TYPE_BLOCK_ELEMS
Definition: vf_nlmeans_vulkan.c:33
weights
static const int weights[]
Definition: hevc_pel.c:32
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlagBits new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:2013
init_integral_pipeline
static av_cold int init_integral_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd_horizontal, FFVulkanShader *shd_vertical, FFVkSPIRVCompiler *spv, const AVPixFmtDescriptor *desc, int planes)
Definition: vf_nlmeans_vulkan.c:118
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ff_vk_shader_link
int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, uint8_t *spirv, size_t spirv_len, const char *entrypoint)
Link a shader into an executable.
Definition: vulkan.c:2353
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:541
vulkan_spirv.h
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:45
FFVkSPIRVCompiler::free_shader
void(* free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque)
Definition: vulkan_spirv.h:31
WeightsPushData::ws_count
uint32_t ws_count
Definition: vf_nlmeans_vulkan.c:303
ff_vk_exec_bind_shader
void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd)
Bind a shader.
Definition: vulkan.c:2889
WeightsPushData::patch_size
int32_t patch_size[4]
Definition: vf_nlmeans_vulkan.c:295
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:56
TYPE_NAME
#define TYPE_NAME
Definition: vf_nlmeans_vulkan.c:30
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:1930
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:278
FFVkExecPool
Definition: vulkan.h:252
pos
unsigned int pos
Definition: spdifenc.c:414
ff_vk_shader_add_push_const
int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1459
OFFSET
#define OFFSET(x)
Definition: vf_nlmeans_vulkan.c:1174
ff_vk_qf_find
AVVulkanDeviceQueueFamily * ff_vk_qf_find(FFVulkanContext *s, VkQueueFlagBits dev_family, VkVideoCodecOperationFlagBitsKHR vid_ops)
Chooses an appropriate QF.
Definition: vulkan.c:274
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:122
init_weights_pipeline
static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd, FFVkSPIRVCompiler *spv, const AVPixFmtDescriptor *desc, int planes)
Definition: vf_nlmeans_vulkan.c:307
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:62
FFVulkanContext::input_format
enum AVPixelFormat input_format
Definition: vulkan.h:320
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:65
random_seed.h
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:53
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
AVFilterContext
An instance of a filter.
Definition: avfilter.h:274
IntegralPushData::nb_components
uint32_t nb_components
Definition: vf_nlmeans_vulkan.c:80
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:63
desc
const char * desc
Definition: libsvtav1.c:79
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:176
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:200
FFFilter::p
AVFilter p
The public AVFilter.
Definition: filters.h:270
mem.h
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
TYPE_BLOCK_SIZE
#define TYPE_BLOCK_SIZE
Definition: vf_nlmeans_vulkan.c:34
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:41
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
DenoisePushData::ws_count
uint32_t ws_count
Definition: vf_nlmeans_vulkan.c:482
IntegralPushData::integral_size
uint64_t integral_size
Definition: vf_nlmeans_vulkan.c:77
IntegralPushData::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:73
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:31
FFVkBuffer
Definition: vulkan.h:87
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:904
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVVulkanDeviceQueueFamily
Definition: hwcontext_vulkan.h:33
width
#define width
Definition: dsp.h:89
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:61
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:253
FFVulkanFunctions
Definition: vulkan_functions.h:276
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1254
src
#define src
Definition: vp8dsp.c:248
WeightsPushData::comp_plane
uint32_t comp_plane[4]
Definition: vf_nlmeans_vulkan.c:298
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:38
WeightsPushData::width
uint32_t width[4]
Definition: vf_nlmeans_vulkan.c:291