FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/opt.h"
23 #include "vulkan_filter.h"
24 
25 #include "filters.h"
26 #include "video.h"
27 
28 extern const unsigned char ff_nlmeans_horizontal_comp_spv_data[];
29 extern const unsigned int ff_nlmeans_horizontal_comp_spv_len;
30 extern const unsigned char ff_nlmeans_vertical_comp_spv_data[];
31 extern const unsigned int ff_nlmeans_vertical_comp_spv_len;
32 extern const unsigned char ff_nlmeans_weights_comp_spv_data[];
33 extern const unsigned int ff_nlmeans_weights_comp_spv_len;
34 extern const unsigned char ff_nlmeans_denoise_comp_spv_data[];
35 extern const unsigned int ff_nlmeans_denoise_comp_spv_len;
36 
37 /* Must be kept in sync with the definitions in the nlmeans_* shaders */
38 #define TYPE_ELEMS 4
39 #define TYPE_SIZE (TYPE_ELEMS*4)
40 #define WG_SIZE 32
41 
42 typedef struct NLMeansVulkanContext {
44 
48 
51 
53 
58 
59  int *xoffsets;
60  int *yoffsets;
62  float strength[4];
63  int patch[4];
64 
65  struct nlmeans_opts {
66  int r;
67  double s;
68  double sc[4];
69  int p;
70  int pc[4];
71  int t;
72  } opts;
74 
75 typedef struct IntegralPushData {
76  uint32_t width[4];
77  uint32_t height[4];
78  float strength[4];
79  uint32_t comp_off[4];
80  uint32_t comp_plane[4];
81  VkDeviceAddress integral_base;
82  uint64_t integral_size;
83  uint64_t int_stride;
84  uint32_t xyoffs_start;
85  uint32_t nb_components;
87 
89  FFVulkanShader *shd_horizontal,
90  FFVulkanShader *shd_vertical,
91  int planes)
92 {
93  int err;
94  FFVulkanShader *shd;
95 
96  /* Horizontal pass */
97  shd = shd_horizontal;
98  ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
99  (uint32_t []) { WG_SIZE, 1, 1 }, 0);
100 
102  VK_SHADER_STAGE_COMPUTE_BIT);
103 
104  RET(ff_vk_shader_link(vkctx, shd,
107 
108  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
109 
110  /* Vertical pass */
111  shd = shd_vertical;
112  ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
113  (uint32_t []) { WG_SIZE, 1, 1 }, 0);
114 
116  VK_SHADER_STAGE_COMPUTE_BIT);
117 
118  const FFVulkanDescriptorSetBinding desc_set_img[] = {
119  { /* input_img */
120  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
121  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
122  .elems = planes,
123  },
124  };
125  ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_img, 1, 0, 0);
126 
127  const FFVulkanDescriptorSetBinding desc_set_xyoffsets[] = {
128  { /* xyoffsets_buffer */
129  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
130  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
131  },
132  };
133  ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_xyoffsets, 1, 1, 0);
134 
135  RET(ff_vk_shader_link(vkctx, shd,
138 
139  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
140 
141 fail:
142  return err;
143 }
144 
145 typedef struct WeightsPushData {
146  uint32_t width[4];
147  uint32_t height[4];
148  uint32_t ws_offset[4];
149  uint32_t ws_stride[4];
151  float strength[4];
152  uint32_t comp_off[4];
153  uint32_t comp_plane[4];
154  VkDeviceAddress integral_base;
155  uint64_t integral_size;
156  uint64_t int_stride;
157  uint32_t xyoffs_start;
158  uint32_t ws_count;
159  uint32_t nb_components;
161 
163  FFVulkanShader *shd, int planes)
164 {
165  int err;
166 
167  ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
168  (uint32_t []) { WG_SIZE, WG_SIZE, 1 }, 0);
169 
171  VK_SHADER_STAGE_COMPUTE_BIT);
172 
173  const FFVulkanDescriptorSetBinding desc_set[] = {
174  { /* input_img */
175  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
176  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
177  .elems = planes,
178  },
179  { /* weights_buffer */
180  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
181  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
182  },
183  { /* sums_buffer */
184  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
185  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
186  },
187  };
188  ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 3, 0, 0);
189 
190  const FFVulkanDescriptorSetBinding desc_set_xyoffsets[] = {
191  { /* xyoffsets_buffer */
192  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
193  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
194  },
195  };
196  ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_xyoffsets, 1, 1, 0);
197 
198  RET(ff_vk_shader_link(vkctx, shd,
201 
202  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
203 
204 fail:
205  return err;
206 }
207 
208 typedef struct DenoisePushData {
209  uint32_t comp_off[4];
210  uint32_t comp_plane[4];
211  uint32_t ws_offset[4];
212  uint32_t ws_stride[4];
213  uint32_t ws_count;
214  uint32_t t;
215  uint32_t nb_components;
217 
219  FFVulkanShader *shd, int planes)
220 {
221  int err;
222 
223  ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
224  (uint32_t []) { WG_SIZE, WG_SIZE, 1 }, 0);
225 
227  VK_SHADER_STAGE_COMPUTE_BIT);
228 
229  const FFVulkanDescriptorSetBinding desc_set_img[] = {
230  { /* input_img */
231  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
232  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
233  .elems = planes,
234  },
235  { /* output_img */
236  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
237  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
238  .elems = planes,
239  },
240  };
241  ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_img, 2, 0, 0);
242 
243  const FFVulkanDescriptorSetBinding desc_set_ws[] = {
244  { /* weights_buffer */
245  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
246  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
247  },
248  { /* sums_buffer */
249  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
250  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
251  },
252  };
253  ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set_ws, 2, 0, 0);
254 
255  RET(ff_vk_shader_link(vkctx, shd,
258 
259  RET(ff_vk_shader_register_exec(vkctx, exec, shd));
260 
261 fail:
262  return err;
263 }
264 
266 {
267  int rad, err;
268  int xcnt = 0, ycnt = 0;
269  NLMeansVulkanContext *s = ctx->priv;
270  FFVulkanContext *vkctx = &s->vkctx;
271  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
272  int *offsets_buf;
273  int offsets_dispatched = 0, nb_dispatches = 0;
274 
275  if (!(s->opts.r & 1)) {
276  s->opts.r |= 1;
277  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
278  s->opts.r);
279  }
280 
281  if (!(s->opts.p & 1)) {
282  s->opts.p |= 1;
283  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
284  s->opts.p);
285  }
286 
287  for (int i = 0; i < 4; i++) {
288  double str = !isnan(s->opts.sc[i]) ? s->opts.sc[i] : s->opts.s;
289  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
290  if (str == 0.0) {
291  s->strength[i] = 0.0;
292  } else {
293  str = 10.0f*str;
294  str *= -str;
295  str = 255.0*255.0 / str;
296  s->strength[i] = str;
297  }
298  if (!(ps & 1)) {
299  ps |= 1;
300  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
301  ps);
302  }
303  s->patch[i] = ps / 2;
304  }
305 
306  rad = s->opts.r/2;
307  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
308  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
309  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
310  s->nb_offsets = 0;
311 
312  for (int x = -rad; x <= rad; x++) {
313  for (int y = -rad; y <= rad; y++) {
314  if (!x && !y)
315  continue;
316 
317  s->xoffsets[xcnt++] = x;
318  s->yoffsets[ycnt++] = y;
319  s->nb_offsets++;
320  }
321  }
322 
323  RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL,
324  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
325  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
326  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
327  RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0));
328 
329  for (int i = 0; i < 2*s->nb_offsets; i += 2) {
330  offsets_buf[i + 0] = s->xoffsets[i >> 1];
331  offsets_buf[i + 1] = s->yoffsets[i >> 1];
332  }
333 
334  RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1));
335 
336  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
337 
338  s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
339  if (!s->qf) {
340  av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n");
341  err = AVERROR(ENOTSUP);
342  goto fail;
343  }
344 
345  RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, 1, 0, 0, 0, NULL));
346 
347  RET(init_integral_pipeline(vkctx, &s->e, &s->shd_horizontal, &s->shd_vertical,
348  planes));
349 
350  RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights, planes));
351 
352  RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise, planes));
353 
354  RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_vertical,
355  1, 0, 0,
356  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
357  VK_FORMAT_UNDEFINED));
358 
359  RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_weights,
360  1, 0, 0,
361  &s->xyoffsets_buf, 0, s->xyoffsets_buf.size,
362  VK_FORMAT_UNDEFINED));
363 
364  do {
365  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
366  offsets_dispatched += wg_invoc * TYPE_ELEMS;
367  nb_dispatches++;
368  } while (offsets_dispatched < s->nb_offsets);
369 
370  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n",
371  s->nb_offsets, nb_dispatches);
372 
373  s->initialized = 1;
374 
375 fail:
376  return err;
377 }
378 
380  FFVkBuffer *ws_vk, uint32_t comp_offs[4], uint32_t comp_planes[4],
381  uint32_t ws_offset[4], uint32_t ws_stride[4],
382  uint32_t ws_count, uint32_t t, uint32_t nb_components)
383 {
384  FFVulkanContext *vkctx = &s->vkctx;
385  FFVulkanFunctions *vk = &vkctx->vkfn;
386 
387  DenoisePushData pd = {
388  { comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
389  { comp_planes[0], comp_planes[1], comp_planes[2], comp_planes[3] },
390  { ws_offset[0], ws_offset[1], ws_offset[2], ws_offset[3] },
391  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
392  ws_count,
393  t,
394  nb_components,
395  };
396 
397  /* Denoise pass pipeline */
398  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_denoise);
399 
400  /* Push data */
401  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_denoise,
402  VK_SHADER_STAGE_COMPUTE_BIT,
403  0, sizeof(pd), &pd);
404 
405  VkBufferMemoryBarrier2 buf_bar;
406  ff_vk_buf_barrier(buf_bar, ws_vk,
407  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
408  SHADER_STORAGE_WRITE_BIT,
409  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
410  0, VK_WHOLE_SIZE);
411  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
412  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
413  .pBufferMemoryBarriers = &buf_bar,
414  .bufferMemoryBarrierCount = 1,
415  });
416 
417  /* End of denoise pass */
418  vk->CmdDispatch(exec->buf,
419  FFALIGN(vkctx->output_width, s->shd_denoise.lg_size[0])/s->shd_denoise.lg_size[0],
420  FFALIGN(vkctx->output_height, s->shd_denoise.lg_size[1])/s->shd_denoise.lg_size[1],
421  av_pix_fmt_count_planes(s->vkctx.output_format));
422 
423  return 0;
424 }
425 
427 {
428  int err;
429  AVFrame *out = NULL;
430  AVFilterContext *ctx = link->dst;
431  NLMeansVulkanContext *s = ctx->priv;
432  AVFilterLink *outlink = ctx->outputs[0];
433  FFVulkanContext *vkctx = &s->vkctx;
434  FFVulkanFunctions *vk = &vkctx->vkfn;
435 
436  const AVPixFmtDescriptor *desc;
437  int comp_offs[4];
438  int comp_planes[4];
439  int plane_widths[4];
440  int plane_heights[4];
441 
442  int offsets_dispatched = 0;
443 
444  /* Integral */
445  AVBufferRef *integral_buf = NULL;
446  FFVkBuffer *integral_vk;
447  size_t int_stride;
448  size_t int_size;
449 
450  /* Weights/sums */
451  AVBufferRef *ws_buf = NULL;
452  FFVkBuffer *ws_vk;
453  uint32_t ws_count = 0;
454  uint32_t ws_offset[4];
455  uint32_t ws_stride[4];
456  size_t ws_size;
457 
458  FFVkExecContext *exec;
459  VkImageView in_views[AV_NUM_DATA_POINTERS];
460  VkImageView out_views[AV_NUM_DATA_POINTERS];
461  VkImageMemoryBarrier2 img_bar[8];
462  int nb_img_bar = 0;
463  VkBufferMemoryBarrier2 buf_bar[2];
464  int nb_buf_bar = 0;
465 
466  if (!s->initialized)
467  RET(init_filter(ctx));
468 
470  if (!desc)
471  return AVERROR(EINVAL);
472 
473  /* Integral image */
474  int_stride = FFALIGN(vkctx->output_width, s->shd_vertical.lg_size[0]) * TYPE_SIZE;
475  int_size = FFALIGN(vkctx->output_height, s->shd_horizontal.lg_size[0]) * int_stride;
476 
477  /* Plane dimensions */
478  for (int i = 0; i < desc->nb_components; i++) {
479  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
480  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_h);
481  plane_widths[i] = FFALIGN(plane_widths[i], s->shd_denoise.lg_size[0]);
482  plane_heights[i] = FFALIGN(plane_heights[i], s->shd_denoise.lg_size[1]);
483 
484  comp_offs[i] = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
485  comp_planes[i] = desc->comp[i].plane;
486 
487  ws_stride[i] = plane_widths[i];
488  ws_offset[i] = ws_count;
489  ws_count += ws_stride[i] * plane_heights[i];
490  }
491 
492  ws_size = ws_count * sizeof(float);
493 
494  /* Buffers */
495  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
496  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
497  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
498  NULL,
499  int_size * s->opts.t * desc->nb_components,
500  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
501  if (err < 0)
502  return err;
503  integral_vk = (FFVkBuffer *)integral_buf->data;
504 
505  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
506  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
507  VK_BUFFER_USAGE_TRANSFER_DST_BIT,
508  NULL,
509  ws_size * s-> opts.t * 2,
510  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
511  if (err < 0)
512  return err;
513  ws_vk = (FFVkBuffer *)ws_buf->data;
514 
515  /* Output frame */
516  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
517  if (!out) {
518  err = AVERROR(ENOMEM);
519  goto fail;
520  }
521 
522  /* Execution context */
523  exec = ff_vk_exec_get(&s->vkctx, &s->e);
524  ff_vk_exec_start(vkctx, exec);
525 
526  /* Dependencies */
527  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
528  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
529  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
530  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
531  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
532  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
533 
534  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
535  integral_buf = NULL;
536 
537  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
538  ws_buf = NULL;
539 
540  /* Input frame prep */
541  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT));
542  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
543  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
544  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
545  VK_ACCESS_SHADER_READ_BIT,
546  VK_IMAGE_LAYOUT_GENERAL,
547  VK_QUEUE_FAMILY_IGNORED);
548 
549  /* Output frame prep */
550  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT));
551  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
552  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
553  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
554  VK_ACCESS_SHADER_WRITE_BIT,
555  VK_IMAGE_LAYOUT_GENERAL,
556  VK_QUEUE_FAMILY_IGNORED);
557 
558  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], ws_vk,
559  ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
560  TRANSFER_BIT, TRANSFER_WRITE_BIT, NONE_KHR,
561  0, VK_WHOLE_SIZE);
562  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
563  ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
564  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
565  0, VK_WHOLE_SIZE);
566  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
567  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
568  .pImageMemoryBarriers = img_bar,
569  .imageMemoryBarrierCount = nb_img_bar,
570  .pBufferMemoryBarriers = buf_bar,
571  .bufferMemoryBarrierCount = nb_buf_bar,
572  });
573  nb_buf_bar = 0;
574  nb_img_bar = 0;
575 
576  /* Buffer zeroing */
577  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
578 
579  /* Update integral descriptors */
580  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_vertical, in, in_views, 0, 0,
581  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
582  /* Update weights descriptors */
583  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_weights, in, in_views, 0, 0,
584  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
585  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1, 0,
586  ws_vk, 0, ws_size * s-> opts.t,
587  VK_FORMAT_UNDEFINED));
588  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 2, 0,
589  ws_vk, ws_size * s-> opts.t, ws_size * s-> opts.t,
590  VK_FORMAT_UNDEFINED));
591 
592  /* Update denoise descriptors */
593  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, in, in_views, 0, 0,
594  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
595  ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, out, out_views, 0, 1,
596  VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
597  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, 0, 0,
598  ws_vk, 0, ws_size * s-> opts.t,
599  VK_FORMAT_UNDEFINED));
600  RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, 1, 0,
601  ws_vk, ws_size * s-> opts.t, ws_size * s-> opts.t,
602  VK_FORMAT_UNDEFINED));
603 
604  VkPipelineStageFlagBits2 ws_stage = VK_PIPELINE_STAGE_2_TRANSFER_BIT;
605  VkAccessFlagBits2 ws_access = VK_ACCESS_2_TRANSFER_WRITE_BIT;
606  do {
607  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
608  IntegralPushData pd = {
609  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
610  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
611  { s->strength[0], s->strength[1], s->strength[2], s->strength[3], },
612  { comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
613  { comp_planes[0], comp_planes[1], comp_planes[2], comp_planes[3] },
614  integral_vk->address,
615  (uint64_t)int_size,
616  (uint64_t)int_stride,
617  offsets_dispatched,
618  desc->nb_components,
619  };
620 
621  /* Vertical pass */
622  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
623  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
624  COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
625  0, VK_WHOLE_SIZE);
626  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
627  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
628  .pBufferMemoryBarriers = buf_bar,
629  .bufferMemoryBarrierCount = nb_buf_bar,
630  });
631  nb_buf_bar = 0;
632 
633  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_vertical);
634  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_vertical,
635  VK_SHADER_STAGE_COMPUTE_BIT,
636  0, sizeof(pd), &pd);
637  vk->CmdDispatch(exec->buf,
638  FFALIGN(vkctx->output_width, s->shd_vertical.lg_size[0]) /
639  s->shd_vertical.lg_size[0],
640  desc->nb_components,
641  wg_invoc);
642 
643  /* Horizontal pass */
644  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
645  COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
646  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
647  SHADER_STORAGE_WRITE_BIT,
648  0, VK_WHOLE_SIZE);
649  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
650  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
651  .pBufferMemoryBarriers = buf_bar,
652  .bufferMemoryBarrierCount = nb_buf_bar,
653  });
654  nb_buf_bar = 0;
655 
656  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_horizontal);
657  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_horizontal,
658  VK_SHADER_STAGE_COMPUTE_BIT,
659  0, sizeof(pd), &pd);
660  vk->CmdDispatch(exec->buf,
661  FFALIGN(vkctx->output_height, s->shd_horizontal.lg_size[0]) /
662  s->shd_horizontal.lg_size[0],
663  desc->nb_components,
664  wg_invoc);
665 
666  /* Weights pass */
667  ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
668  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
669  SHADER_STORAGE_WRITE_BIT,
670  COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
671  0, VK_WHOLE_SIZE);
672  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
673  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
674  .srcStageMask = ws_stage,
675  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
676  .srcAccessMask = ws_access,
677  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
678  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
679  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
680  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
681  .buffer = ws_vk->buf,
682  .size = ws_vk->size,
683  .offset = 0,
684  };
685  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
686  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
687  .pBufferMemoryBarriers = buf_bar,
688  .bufferMemoryBarrierCount = nb_buf_bar,
689  });
690  nb_buf_bar = 0;
691  ws_stage = buf_bar[1].dstStageMask;
692  ws_access = buf_bar[1].dstAccessMask;
693 
694  WeightsPushData wpd = {
695  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
696  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
697  { ws_offset[0], ws_offset[1], ws_offset[2], ws_offset[3] },
698  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
699  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
700  { s->strength[0], s->strength[1], s->strength[2], s->strength[3], },
701  { comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
702  { comp_planes[0], comp_planes[1], comp_planes[2], comp_planes[3] },
703  integral_vk->address,
704  (uint64_t)int_size,
705  (uint64_t)int_stride,
706  offsets_dispatched,
707  ws_count,
708  desc->nb_components,
709  };
710  ff_vk_exec_bind_shader(vkctx, exec, &s->shd_weights);
711  ff_vk_shader_update_push_const(vkctx, exec, &s->shd_weights,
712  VK_SHADER_STAGE_COMPUTE_BIT,
713  0, sizeof(wpd), &wpd);
714  vk->CmdDispatch(exec->buf,
715  FFALIGN(vkctx->output_width, s->shd_weights.lg_size[0]) /
716  s->shd_weights.lg_size[0],
717  FFALIGN(vkctx->output_height, s->shd_weights.lg_size[1]) /
718  s->shd_weights.lg_size[1],
719  wg_invoc * desc->nb_components);
720 
721  offsets_dispatched += wg_invoc * TYPE_ELEMS;
722  } while (offsets_dispatched < s->nb_offsets);
723 
724  RET(denoise_pass(s, exec, ws_vk, comp_offs, comp_planes, ws_offset, ws_stride,
725  ws_count, s->opts.t, desc->nb_components));
726 
727  err = ff_vk_exec_submit(vkctx, exec);
728  if (err < 0)
729  return err;
730 
731  err = av_frame_copy_props(out, in);
732  if (err < 0)
733  goto fail;
734 
735  av_frame_free(&in);
736 
737  return ff_filter_frame(outlink, out);
738 
739 fail:
740  av_buffer_unref(&integral_buf);
741  av_buffer_unref(&ws_buf);
742  av_frame_free(&in);
743  av_frame_free(&out);
744  return err;
745 }
746 
748 {
749  NLMeansVulkanContext *s = avctx->priv;
750  FFVulkanContext *vkctx = &s->vkctx;
751 
752  ff_vk_exec_pool_free(vkctx, &s->e);
753  ff_vk_shader_free(vkctx, &s->shd_horizontal);
754  ff_vk_shader_free(vkctx, &s->shd_vertical);
755  ff_vk_shader_free(vkctx, &s->shd_weights);
756  ff_vk_shader_free(vkctx, &s->shd_denoise);
757 
758  av_buffer_pool_uninit(&s->integral_buf_pool);
759  av_buffer_pool_uninit(&s->ws_buf_pool);
760 
761  ff_vk_uninit(&s->vkctx);
762 
763  av_freep(&s->xoffsets);
764  av_freep(&s->yoffsets);
765 
766  s->initialized = 0;
767 }
768 
769 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
770 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
772  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 0.0, 100.0, FLAGS },
773  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
774  { "r", "research window size", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
775  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 8 }, 1, 64, FLAGS },
776 
777  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
778  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
779  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
780  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = NAN }, 0.0, 100.0, FLAGS },
781 
782  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
783  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
784  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
785  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
786 
787  { NULL }
788 };
789 
790 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
791 
793  {
794  .name = "default",
795  .type = AVMEDIA_TYPE_VIDEO,
796  .filter_frame = &nlmeans_vulkan_filter_frame,
797  .config_props = &ff_vk_filter_config_input,
798  },
799 };
800 
802  {
803  .name = "default",
804  .type = AVMEDIA_TYPE_VIDEO,
805  .config_props = &ff_vk_filter_config_output,
806  },
807 };
808 
810  .p.name = "nlmeans_vulkan",
811  .p.description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
812  .p.priv_class = &nlmeans_vulkan_class,
813  .p.flags = AVFILTER_FLAG_HWDEVICE,
814  .priv_size = sizeof(NLMeansVulkanContext),
820  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
821 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:89
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:69
DenoisePushData::comp_plane
uint32_t comp_plane[4]
Definition: vf_nlmeans_vulkan.c:210
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:1050
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:216
WeightsPushData::int_stride
uint64_t int_stride
Definition: vf_nlmeans_vulkan.c:156
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(nlmeans_vulkan)
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:361
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ff_vf_nlmeans_vulkan
const FFFilter ff_vf_nlmeans_vulkan
Definition: vf_nlmeans_vulkan.c:809
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd)
Free a shader.
Definition: vulkan.c:2845
out
static FILE * out
Definition: movenc.c:55
NLMeansVulkanContext::shd_weights
FFVulkanShader shd_weights
Definition: vf_nlmeans_vulkan.c:56
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
WeightsPushData::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:151
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1068
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
RET
#define RET(x)
Definition: vulkan.h:68
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:357
WeightsPushData
Definition: vf_nlmeans_vulkan.c:145
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:208
IntegralPushData::height
uint32_t height[4]
Definition: vf_nlmeans_vulkan.c:77
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:64
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:49
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:459
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:233
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:603
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:59
AVOption
AVOption.
Definition: opt.h:429
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:254
filters.h
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:226
NLMeansVulkanContext::shd_horizontal
FFVulkanShader shd_horizontal
Definition: vf_nlmeans_vulkan.c:54
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:130
NLMeansVulkanContext::shd_vertical
FFVulkanShader shd_vertical
Definition: vf_nlmeans_vulkan.c:55
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:568
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:62
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:2871
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:220
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:45
IntegralPushData::int_stride
uint64_t int_stride
Definition: vf_nlmeans_vulkan.c:83
DenoisePushData::comp_off
uint32_t comp_off[4]
Definition: vf_nlmeans_vulkan.c:209
WeightsPushData::ws_offset
uint32_t ws_offset[4]
Definition: vf_nlmeans_vulkan.c:148
video.h
ff_vk_exec_bind_shader
void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, const FFVulkanShader *shd)
Bind a shader.
Definition: vulkan.c:2822
IntegralPushData::width
uint32_t width[4]
Definition: vf_nlmeans_vulkan.c:76
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:800
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:126
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:60
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3496
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:289
init_integral_pipeline
static av_cold int init_integral_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd_horizontal, FFVulkanShader *shd_vertical, int planes)
Definition: vf_nlmeans_vulkan.c:88
fail
#define fail()
Definition: checkasm.h:225
vulkan_filter.h
ff_nlmeans_vertical_comp_spv_len
const unsigned int ff_nlmeans_vertical_comp_spv_len
ff_vk_shader_update_img_array
void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Update a descriptor in a buffer with an image array.
Definition: vulkan.c:2773
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags2 src_stage, VkPipelineStageFlags2 dst_stage, VkAccessFlagBits2 new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:2085
ff_vk_shader_register_exec
int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd)
Register a shader with an exec pool.
Definition: vulkan.c:2638
NLMeansVulkanContext::shd_denoise
FFVulkanShader shd_denoise
Definition: vf_nlmeans_vulkan.c:57
DenoisePushData::ws_offset
uint32_t ws_offset[4]
Definition: vf_nlmeans_vulkan.c:211
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:40
FFVulkanDescriptorSetBinding::type
VkDescriptorType type
Definition: vulkan.h:114
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
av_cold
#define av_cold
Definition: attributes.h:119
WeightsPushData::integral_size
uint64_t integral_size
Definition: vf_nlmeans_vulkan.c:155
FFFilter
Definition: filters.h:267
float
float
Definition: af_crystalizer.c:122
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:360
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:50
s
#define s(width, name)
Definition: cbs_vp9.c:198
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:265
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
WeightsPushData::xyoffs_start
uint32_t xyoffs_start
Definition: vf_nlmeans_vulkan.c:157
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Underlying C type is double.
Definition: opt.h:267
FLAGS
#define FLAGS
Definition: vf_nlmeans_vulkan.c:770
FF_VK_REP_FLOAT
@ FF_VK_REP_FLOAT
Definition: vulkan.h:451
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:640
denoise_pass
static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec, FFVkBuffer *ws_vk, uint32_t comp_offs[4], uint32_t comp_planes[4], uint32_t ws_offset[4], uint32_t ws_stride[4], uint32_t ws_count, uint32_t t, uint32_t nb_components)
Definition: vf_nlmeans_vulkan.c:379
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:299
NAN
#define NAN
Definition: mathematics.h:115
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
WeightsPushData::nb_components
uint32_t nb_components
Definition: vf_nlmeans_vulkan.c:159
WeightsPushData::integral_base
VkDeviceAddress integral_base
Definition: vf_nlmeans_vulkan.c:154
IntegralPushData::integral_base
VkDeviceAddress integral_base
Definition: vf_nlmeans_vulkan.c:81
opts
static AVDictionary * opts
Definition: movenc.c:51
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:599
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
WeightsPushData::height
uint32_t height[4]
Definition: vf_nlmeans_vulkan.c:147
nlmeans_vulkan_options
static const AVOption nlmeans_vulkan_options[]
Definition: vf_nlmeans_vulkan.c:771
ff_nlmeans_vertical_comp_spv_data
const unsigned char ff_nlmeans_vertical_comp_spv_data[]
isnan
#define isnan(x)
Definition: libm.h:342
ff_nlmeans_denoise_comp_spv_len
const unsigned int ff_nlmeans_denoise_comp_spv_len
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:328
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:209
ff_vk_shader_link
int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, const char *spirv, size_t spirv_len, const char *entrypoint)
Link a shader into an executable.
Definition: vulkan.c:2411
FFVkBuffer::size
size_t size
Definition: vulkan.h:129
IntegralPushData::comp_off
uint32_t comp_off[4]
Definition: vf_nlmeans_vulkan.c:79
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:65
FFVulkanContext
Definition: vulkan.h:312
WeightsPushData::comp_off
uint32_t comp_off[4]
Definition: vf_nlmeans_vulkan.c:152
WeightsPushData::ws_stride
uint32_t ws_stride[4]
Definition: vf_nlmeans_vulkan.c:149
nlmeans_vulkan_inputs
static const AVFilterPad nlmeans_vulkan_inputs[]
Definition: vf_nlmeans_vulkan.c:792
WG_SIZE
#define WG_SIZE
Definition: vf_nlmeans_vulkan.c:40
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:63
DenoisePushData
Definition: vf_nlmeans_vulkan.c:208
NLMeansVulkanContext::qf
AVVulkanDeviceQueueFamily * qf
Definition: vf_nlmeans_vulkan.c:47
ff_vk_buf_barrier
#define ff_vk_buf_barrier(dst, vkb, s_stage, s_access, s_access2, d_stage, d_access, d_access2, offs, bsz)
Definition: vulkan.h:551
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:551
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:42
ff_vk_shader_update_push_const
void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Update push constant in a shader.
Definition: vulkan.c:2812
NLMeansVulkanContext::xyoffsets_buf
FFVkBuffer xyoffsets_buf
Definition: vf_nlmeans_vulkan.c:52
FFVulkanDescriptorSetBinding
Definition: vulkan.h:112
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
IntegralPushData::comp_plane
uint32_t comp_plane[4]
Definition: vf_nlmeans_vulkan.c:80
IntegralPushData
Definition: vf_nlmeans_vulkan.c:75
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:188
nlmeans_vulkan_outputs
static const AVFilterPad nlmeans_vulkan_outputs[]
Definition: vf_nlmeans_vulkan.c:801
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:71
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:460
DenoisePushData::t
uint32_t t
Definition: vf_nlmeans_vulkan.c:214
FFVulkanShader
Definition: vulkan.h:225
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:362
nlmeans_vulkan_uninit
static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
Definition: vf_nlmeans_vulkan.c:747
ff_nlmeans_denoise_comp_spv_data
const unsigned char ff_nlmeans_denoise_comp_spv_data[]
FFVkExecContext
Definition: vulkan.h:145
ff_vk_shader_update_desc_buffer
int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt)
Update a descriptor in a buffer with a buffer.
Definition: vulkan.c:2786
DenoisePushData::ws_stride
uint32_t ws_stride[4]
Definition: vf_nlmeans_vulkan.c:212
IntegralPushData::xyoffs_start
uint32_t xyoffs_start
Definition: vf_nlmeans_vulkan.c:84
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:39
nlmeans_vulkan_filter_frame
static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
Definition: vf_nlmeans_vulkan.c:426
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:580
av_malloc
#define av_malloc(s)
Definition: ops_asmgen.c:44
DenoisePushData::nb_components
uint32_t nb_components
Definition: vf_nlmeans_vulkan.c:215
init_filter
static av_cold int init_filter(AVFilterContext *ctx)
Definition: vf_nlmeans_vulkan.c:265
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:610
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:46
WeightsPushData::ws_count
uint32_t ws_count
Definition: vf_nlmeans_vulkan.c:158
planes
static const struct @586 planes[]
WeightsPushData::patch_size
int32_t patch_size[4]
Definition: vf_nlmeans_vulkan.c:150
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:61
ff_nlmeans_weights_comp_spv_len
const unsigned int ff_nlmeans_weights_comp_spv_len
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:2002
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:316
ff_nlmeans_weights_comp_spv_data
const unsigned char ff_nlmeans_weights_comp_spv_data[]
FFVkExecPool
Definition: vulkan.h:290
ff_vk_shader_add_push_const
int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1509
OFFSET
#define OFFSET(x)
Definition: vf_nlmeans_vulkan.c:769
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:264
ff_nlmeans_horizontal_comp_spv_len
const unsigned int ff_nlmeans_horizontal_comp_spv_len
ff_vk_qf_find
AVVulkanDeviceQueueFamily * ff_vk_qf_find(FFVulkanContext *s, VkQueueFlagBits dev_family, VkVideoCodecOperationFlagBitsKHR vid_ops)
Chooses an appropriate QF.
Definition: vulkan.c:286
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:156
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:67
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:70
ff_vk_shader_add_descriptor_set
int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, const FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only)
Add descriptor to a shader.
Definition: vulkan.c:2538
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
AVFilterContext
An instance of a filter.
Definition: avfilter.h:274
IntegralPushData::nb_components
uint32_t nb_components
Definition: vf_nlmeans_vulkan.c:85
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:68
desc
const char * desc
Definition: libsvtav1.c:83
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:176
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:200
FFFilter::p
AVFilter p
The public AVFilter.
Definition: filters.h:271
mem.h
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
init_denoise_pipeline
static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd, int planes)
Definition: vf_nlmeans_vulkan.c:218
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:46
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
DenoisePushData::ws_count
uint32_t ws_count
Definition: vf_nlmeans_vulkan.c:213
init_weights_pipeline
static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, FFVulkanShader *shd, int planes)
Definition: vf_nlmeans_vulkan.c:162
IntegralPushData::integral_size
uint64_t integral_size
Definition: vf_nlmeans_vulkan.c:82
IntegralPushData::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:78
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:38
FFVkBuffer
Definition: vulkan.h:125
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:925
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVVulkanDeviceQueueFamily
Definition: hwcontext_vulkan.h:33
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:66
ff_nlmeans_horizontal_comp_spv_data
const unsigned char ff_nlmeans_horizontal_comp_spv_data[]
FFVulkanFunctions
Definition: vulkan_functions.h:275
ff_vk_shader_load
int ff_vk_shader_load(FFVulkanShader *shd, VkPipelineStageFlags stage, VkSpecializationInfo *spec, uint32_t wg_size[3], uint32_t required_subgroup_size)
Initialize a shader object.
Definition: vulkan.c:2128
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1306
WeightsPushData::comp_plane
uint32_t comp_plane[4]
Definition: vf_nlmeans_vulkan.c:153
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:43
WeightsPushData::width
uint32_t width[4]
Definition: vf_nlmeans_vulkan.c:146