FFmpeg
vf_scale_cuda.c
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22 
23 #include <float.h>
24 #include <stdio.h>
25 #include <string.h>
26 
27 #include "libavutil/avstring.h"
28 #include "libavutil/common.h"
29 #include "libavutil/hwcontext.h"
31 #include "libavutil/cuda_check.h"
32 #include "libavutil/internal.h"
33 #include "libavutil/opt.h"
34 #include "libavutil/pixdesc.h"
35 
36 #include "avfilter.h"
37 #include "formats.h"
38 #include "internal.h"
39 #include "scale_eval.h"
40 #include "video.h"
41 
42 #include "cuda/load_helper.h"
43 #include "vf_scale_cuda.h"
44 
45 static const enum AVPixelFormat supported_formats[] = {
54 };
55 
56 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
57 #define BLOCKX 32
58 #define BLOCKY 16
59 
60 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
61 
62 enum {
64 
69 
71 };
72 
73 typedef struct CUDAScaleContext {
74  const AVClass *class;
75 
77 
80 
83 
86 
87  /**
88  * Output sw format. AV_PIX_FMT_NONE for no conversion.
89  */
91 
92  char *w_expr; ///< width expression string
93  char *h_expr; ///< height expression string
94 
97 
98  CUcontext cu_ctx;
99  CUmodule cu_module;
100  CUfunction cu_func_uchar;
101  CUfunction cu_func_uchar2;
102  CUfunction cu_func_uchar4;
103  CUfunction cu_func_ushort;
104  CUfunction cu_func_ushort2;
105  CUfunction cu_func_ushort4;
106  CUstream cu_stream;
107 
108  CUdeviceptr srcBuffer;
109  CUdeviceptr dstBuffer;
111 
115 
116  float param;
118 
120 {
121  CUDAScaleContext *s = ctx->priv;
122 
123  s->format = AV_PIX_FMT_NONE;
124  s->frame = av_frame_alloc();
125  if (!s->frame)
126  return AVERROR(ENOMEM);
127 
128  s->tmp_frame = av_frame_alloc();
129  if (!s->tmp_frame)
130  return AVERROR(ENOMEM);
131 
132  return 0;
133 }
134 
136 {
137  CUDAScaleContext *s = ctx->priv;
138 
139  if (s->hwctx && s->cu_module) {
140  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
141  CUcontext dummy;
142 
143  CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
144  CHECK_CU(cu->cuModuleUnload(s->cu_module));
145  s->cu_module = NULL;
146  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
147  }
148 
149  av_frame_free(&s->frame);
150  av_buffer_unref(&s->frames_ctx);
151  av_frame_free(&s->tmp_frame);
152 }
153 
155 {
156  static const enum AVPixelFormat pixel_formats[] = {
158  };
159  AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
160  if (!pix_fmts)
161  return AVERROR(ENOMEM);
162 
164 }
165 
166 static av_cold int init_hwframe_ctx(CUDAScaleContext *s, AVBufferRef *device_ctx, int width, int height)
167 {
168  AVBufferRef *out_ref = NULL;
169  AVHWFramesContext *out_ctx;
170  int ret;
171 
172  out_ref = av_hwframe_ctx_alloc(device_ctx);
173  if (!out_ref)
174  return AVERROR(ENOMEM);
175  out_ctx = (AVHWFramesContext*)out_ref->data;
176 
177  out_ctx->format = AV_PIX_FMT_CUDA;
178  out_ctx->sw_format = s->out_fmt;
179  out_ctx->width = FFALIGN(width, 32);
180  out_ctx->height = FFALIGN(height, 32);
181 
182  ret = av_hwframe_ctx_init(out_ref);
183  if (ret < 0)
184  goto fail;
185 
186  av_frame_unref(s->frame);
187  ret = av_hwframe_get_buffer(out_ref, s->frame, 0);
188  if (ret < 0)
189  goto fail;
190 
191  s->frame->width = width;
192  s->frame->height = height;
193 
194  av_buffer_unref(&s->frames_ctx);
195  s->frames_ctx = out_ref;
196 
197  return 0;
198 fail:
199  av_buffer_unref(&out_ref);
200  return ret;
201 }
202 
203 static int format_is_supported(enum AVPixelFormat fmt)
204 {
205  int i;
206 
207  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
208  if (supported_formats[i] == fmt)
209  return 1;
210  return 0;
211 }
212 
213 static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
214  int out_width, int out_height)
215 {
216  CUDAScaleContext *s = ctx->priv;
217 
218  AVHWFramesContext *in_frames_ctx;
219 
220  enum AVPixelFormat in_format;
221  enum AVPixelFormat out_format;
222  int ret;
223 
224  /* check that we have a hw context */
225  if (!ctx->inputs[0]->hw_frames_ctx) {
226  av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
227  return AVERROR(EINVAL);
228  }
229  in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
230  in_format = in_frames_ctx->sw_format;
231  out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
232 
233  if (!format_is_supported(in_format)) {
234  av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
235  av_get_pix_fmt_name(in_format));
236  return AVERROR(ENOSYS);
237  }
238  if (!format_is_supported(out_format)) {
239  av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
240  av_get_pix_fmt_name(out_format));
241  return AVERROR(ENOSYS);
242  }
243 
244  s->in_fmt = in_format;
245  s->out_fmt = out_format;
246 
247  if (s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) {
248  s->frames_ctx = av_buffer_ref(ctx->inputs[0]->hw_frames_ctx);
249  if (!s->frames_ctx)
250  return AVERROR(ENOMEM);
251  } else {
252  s->passthrough = 0;
253 
254  ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, out_width, out_height);
255  if (ret < 0)
256  return ret;
257  }
258 
259  ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
260  if (!ctx->outputs[0]->hw_frames_ctx)
261  return AVERROR(ENOMEM);
262 
263  return 0;
264 }
265 
267 {
268  AVFilterContext *ctx = outlink->src;
269  AVFilterLink *inlink = outlink->src->inputs[0];
270  CUDAScaleContext *s = ctx->priv;
271  AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
272  AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
273  CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
274  CudaFunctions *cu = device_hwctx->internal->cuda_dl;
275  char buf[64];
276  int w, h;
277  int ret;
278 
279  const unsigned char *scaler_ptx;
280  unsigned int scaler_ptx_len;
281  const char *function_infix = "";
282 
283  extern const unsigned char ff_vf_scale_cuda_ptx_data[];
284  extern const unsigned int ff_vf_scale_cuda_ptx_len;
285  extern const unsigned char ff_vf_scale_cuda_bicubic_ptx_data[];
286  extern const unsigned int ff_vf_scale_cuda_bicubic_ptx_len;
287 
288  switch(s->interp_algo) {
289  case INTERP_ALGO_NEAREST:
290  scaler_ptx = ff_vf_scale_cuda_ptx_data;
291  scaler_ptx_len = ff_vf_scale_cuda_ptx_len;
292  function_infix = "_Nearest";
293  s->interp_use_linear = 0;
294  s->interp_as_integer = 1;
295  break;
297  scaler_ptx = ff_vf_scale_cuda_ptx_data;
298  scaler_ptx_len = ff_vf_scale_cuda_ptx_len;
299  function_infix = "_Bilinear";
300  s->interp_use_linear = 1;
301  s->interp_as_integer = 1;
302  break;
303  case INTERP_ALGO_DEFAULT:
304  case INTERP_ALGO_BICUBIC:
305  scaler_ptx = ff_vf_scale_cuda_bicubic_ptx_data;
306  scaler_ptx_len = ff_vf_scale_cuda_bicubic_ptx_len;
307  function_infix = "_Bicubic";
308  s->interp_use_linear = 0;
309  s->interp_as_integer = 0;
310  break;
311  case INTERP_ALGO_LANCZOS:
312  scaler_ptx = ff_vf_scale_cuda_bicubic_ptx_data;
313  scaler_ptx_len = ff_vf_scale_cuda_bicubic_ptx_len;
314  function_infix = "_Lanczos";
315  s->interp_use_linear = 0;
316  s->interp_as_integer = 0;
317  break;
318  default:
319  av_log(ctx, AV_LOG_ERROR, "Unknown interpolation algorithm\n");
320  return AVERROR_BUG;
321  }
322 
323  s->hwctx = device_hwctx;
324  s->cu_stream = s->hwctx->stream;
325 
326  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
327  if (ret < 0)
328  goto fail;
329 
330  ret = ff_cuda_load_module(ctx, device_hwctx, &s->cu_module, scaler_ptx, scaler_ptx_len);
331  if (ret < 0)
332  goto fail;
333 
334  snprintf(buf, sizeof(buf), "Subsample%s_uchar", function_infix);
335  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, buf));
336  if (ret < 0)
337  goto fail;
338 
339  snprintf(buf, sizeof(buf), "Subsample%s_uchar2", function_infix);
340  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, buf));
341  if (ret < 0)
342  goto fail;
343 
344  snprintf(buf, sizeof(buf), "Subsample%s_uchar4", function_infix);
345  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, buf));
346  if (ret < 0)
347  goto fail;
348 
349  snprintf(buf, sizeof(buf), "Subsample%s_ushort", function_infix);
350  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, buf));
351  if (ret < 0)
352  goto fail;
353 
354  snprintf(buf, sizeof(buf), "Subsample%s_ushort2", function_infix);
355  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, buf));
356  if (ret < 0)
357  goto fail;
358 
359  snprintf(buf, sizeof(buf), "Subsample%s_ushort4", function_infix);
360  CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, buf));
361  if (ret < 0)
362  goto fail;
363 
364 
365  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
366 
368  s->w_expr, s->h_expr,
369  inlink, outlink,
370  &w, &h)) < 0)
371  goto fail;
372 
374  s->force_original_aspect_ratio, s->force_divisible_by);
375 
376  if (((int64_t)h * inlink->w) > INT_MAX ||
377  ((int64_t)w * inlink->h) > INT_MAX)
378  av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
379 
380  outlink->w = w;
381  outlink->h = h;
382 
384  if (ret < 0)
385  return ret;
386 
387  av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d%s\n",
388  inlink->w, inlink->h, outlink->w, outlink->h, s->passthrough ? " (passthrough)" : "");
389 
390  if (inlink->sample_aspect_ratio.num) {
391  outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
392  outlink->w*inlink->h},
393  inlink->sample_aspect_ratio);
394  } else {
395  outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
396  }
397 
398  return 0;
399 
400 fail:
401  return ret;
402 }
403 
404 static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels,
405  uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
406  uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
407  int pixel_size, int bit_depth)
408 {
409  CUDAScaleContext *s = ctx->priv;
410  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
411  CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
412  CUtexObject tex = 0;
413  void *args_uchar[] = { &tex, &dst_devptr, &dst_width, &dst_height, &dst_pitch,
414  &src_width, &src_height, &bit_depth, &s->param };
415  int ret;
416 
417  CUDA_TEXTURE_DESC tex_desc = {
418  .filterMode = s->interp_use_linear ?
419  CU_TR_FILTER_MODE_LINEAR :
420  CU_TR_FILTER_MODE_POINT,
421  .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
422  };
423 
424  CUDA_RESOURCE_DESC res_desc = {
425  .resType = CU_RESOURCE_TYPE_PITCH2D,
426  .res.pitch2D.format = pixel_size == 1 ?
427  CU_AD_FORMAT_UNSIGNED_INT8 :
428  CU_AD_FORMAT_UNSIGNED_INT16,
429  .res.pitch2D.numChannels = channels,
430  .res.pitch2D.width = src_width,
431  .res.pitch2D.height = src_height,
432  .res.pitch2D.pitchInBytes = src_pitch,
433  .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
434  };
435 
436  // Handling of channels is done via vector-types in cuda, so their size is implicitly part of the pitch
437  // Same for pixel_size, which is represented via datatypes on the cuda side of things.
438  dst_pitch /= channels * pixel_size;
439 
440  ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
441  if (ret < 0)
442  goto exit;
443 
444  ret = CHECK_CU(cu->cuLaunchKernel(func,
445  DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
446  BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
447 
448 exit:
449  if (tex)
450  CHECK_CU(cu->cuTexObjectDestroy(tex));
451 
452  return ret;
453 }
454 
456  AVFrame *out, AVFrame *in)
457 {
458  AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
459  CUDAScaleContext *s = ctx->priv;
460 
461  switch (in_frames_ctx->sw_format) {
462  case AV_PIX_FMT_YUV420P:
463  call_resize_kernel(ctx, s->cu_func_uchar, 1,
464  in->data[0], in->width, in->height, in->linesize[0],
465  out->data[0], out->width, out->height, out->linesize[0],
466  1, 8);
467  call_resize_kernel(ctx, s->cu_func_uchar, 1,
468  in->data[1], in->width / 2, in->height / 2, in->linesize[1],
469  out->data[1], out->width / 2, out->height / 2, out->linesize[1],
470  1, 8);
471  call_resize_kernel(ctx, s->cu_func_uchar, 1,
472  in->data[2], in->width / 2, in->height / 2, in->linesize[2],
473  out->data[2], out->width / 2, out->height / 2, out->linesize[2],
474  1, 8);
475  break;
476  case AV_PIX_FMT_YUV444P:
477  call_resize_kernel(ctx, s->cu_func_uchar, 1,
478  in->data[0], in->width, in->height, in->linesize[0],
479  out->data[0], out->width, out->height, out->linesize[0],
480  1, 8);
481  call_resize_kernel(ctx, s->cu_func_uchar, 1,
482  in->data[1], in->width, in->height, in->linesize[1],
483  out->data[1], out->width, out->height, out->linesize[1],
484  1, 8);
485  call_resize_kernel(ctx, s->cu_func_uchar, 1,
486  in->data[2], in->width, in->height, in->linesize[2],
487  out->data[2], out->width, out->height, out->linesize[2],
488  1, 8);
489  break;
491  call_resize_kernel(ctx, s->cu_func_ushort, 1,
492  in->data[0], in->width, in->height, in->linesize[0],
493  out->data[0], out->width, out->height, out->linesize[0],
494  2, 16);
495  call_resize_kernel(ctx, s->cu_func_ushort, 1,
496  in->data[1], in->width, in->height, in->linesize[1],
497  out->data[1], out->width, out->height, out->linesize[1],
498  2, 16);
499  call_resize_kernel(ctx, s->cu_func_ushort, 1,
500  in->data[2], in->width, in->height, in->linesize[2],
501  out->data[2], out->width, out->height, out->linesize[2],
502  2, 16);
503  break;
504  case AV_PIX_FMT_NV12:
505  call_resize_kernel(ctx, s->cu_func_uchar, 1,
506  in->data[0], in->width, in->height, in->linesize[0],
507  out->data[0], out->width, out->height, out->linesize[0],
508  1, 8);
509  call_resize_kernel(ctx, s->cu_func_uchar2, 2,
510  in->data[1], in->width / 2, in->height / 2, in->linesize[1],
511  out->data[1], out->width / 2, out->height / 2, out->linesize[1],
512  1, 8);
513  break;
514  case AV_PIX_FMT_P010LE:
515  call_resize_kernel(ctx, s->cu_func_ushort, 1,
516  in->data[0], in->width, in->height, in->linesize[0],
517  out->data[0], out->width, out->height, out->linesize[0],
518  2, 10);
519  call_resize_kernel(ctx, s->cu_func_ushort2, 2,
520  in->data[1], in->width / 2, in->height / 2, in->linesize[1],
521  out->data[1], out->width / 2, out->height / 2, out->linesize[1],
522  2, 10);
523  break;
524  case AV_PIX_FMT_P016LE:
525  call_resize_kernel(ctx, s->cu_func_ushort, 1,
526  in->data[0], in->width, in->height, in->linesize[0],
527  out->data[0], out->width, out->height, out->linesize[0],
528  2, 16);
529  call_resize_kernel(ctx, s->cu_func_ushort2, 2,
530  in->data[1], in->width / 2, in->height / 2, in->linesize[1],
531  out->data[1], out->width / 2, out->height / 2, out->linesize[1],
532  2, 16);
533  break;
534  case AV_PIX_FMT_0RGB32:
535  case AV_PIX_FMT_0BGR32:
536  call_resize_kernel(ctx, s->cu_func_uchar4, 4,
537  in->data[0], in->width, in->height, in->linesize[0],
538  out->data[0], out->width, out->height, out->linesize[0],
539  1, 8);
540  break;
541  default:
542  return AVERROR_BUG;
543  }
544 
545  return 0;
546 }
547 
549 {
550  CUDAScaleContext *s = ctx->priv;
551  AVFilterLink *outlink = ctx->outputs[0];
552  AVFrame *src = in;
553  int ret;
554 
555  ret = scalecuda_resize(ctx, s->frame, src);
556  if (ret < 0)
557  return ret;
558 
559  src = s->frame;
560  ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
561  if (ret < 0)
562  return ret;
563 
564  av_frame_move_ref(out, s->frame);
565  av_frame_move_ref(s->frame, s->tmp_frame);
566 
567  s->frame->width = outlink->w;
568  s->frame->height = outlink->h;
569 
570  ret = av_frame_copy_props(out, in);
571  if (ret < 0)
572  return ret;
573 
574  return 0;
575 }
576 
578 {
579  AVFilterContext *ctx = link->dst;
580  CUDAScaleContext *s = ctx->priv;
581  AVFilterLink *outlink = ctx->outputs[0];
582  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
583 
584  AVFrame *out = NULL;
585  CUcontext dummy;
586  int ret = 0;
587 
588  if (s->passthrough)
589  return ff_filter_frame(outlink, in);
590 
591  out = av_frame_alloc();
592  if (!out) {
593  ret = AVERROR(ENOMEM);
594  goto fail;
595  }
596 
597  ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
598  if (ret < 0)
599  goto fail;
600 
601  ret = cudascale_scale(ctx, out, in);
602 
603  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
604  if (ret < 0)
605  goto fail;
606 
607  av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
608  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
609  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
610  INT_MAX);
611 
612  av_frame_free(&in);
613  return ff_filter_frame(outlink, out);
614 fail:
615  av_frame_free(&in);
616  av_frame_free(&out);
617  return ret;
618 }
619 
621 {
622  CUDAScaleContext *s = inlink->dst->priv;
623 
624  return s->passthrough ?
627 }
628 
629 #define OFFSET(x) offsetof(CUDAScaleContext, x)
630 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
631 static const AVOption options[] = {
632  { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
633  { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
634  { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" },
635  { "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" },
636  { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
637  { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" },
638  { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" },
639  { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
640  { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS },
641  { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },
642  { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
643  { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
644  { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
645  { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS },
646  { NULL },
647 };
648 
649 static const AVClass cudascale_class = {
650  .class_name = "cudascale",
651  .item_name = av_default_item_name,
652  .option = options,
653  .version = LIBAVUTIL_VERSION_INT,
654 };
655 
656 static const AVFilterPad cudascale_inputs[] = {
657  {
658  .name = "default",
659  .type = AVMEDIA_TYPE_VIDEO,
660  .filter_frame = cudascale_filter_frame,
661  .get_video_buffer = cudascale_get_video_buffer,
662  },
663  { NULL }
664 };
665 
666 static const AVFilterPad cudascale_outputs[] = {
667  {
668  .name = "default",
669  .type = AVMEDIA_TYPE_VIDEO,
670  .config_props = cudascale_config_props,
671  },
672  { NULL }
673 };
674 
676  .name = "scale_cuda",
677  .description = NULL_IF_CONFIG_SMALL("GPU accelerated video resizer"),
678 
679  .init = cudascale_init,
680  .uninit = cudascale_uninit,
681  .query_formats = cudascale_query_formats,
682 
683  .priv_size = sizeof(CUDAScaleContext),
684  .priv_class = &cudascale_class,
685 
688 
689  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
690 };
call_resize_kernel
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channels, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, int pixel_size, int bit_depth)
Definition: vf_scale_cuda.c:404
options
static const AVOption options[]
Definition: vf_scale_cuda.c:631
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
AVHWDeviceContext::hwctx
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:92
bit_depth
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:254
AV_PIX_FMT_CUDA
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
Definition: pixfmt.h:225
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
INTERP_ALGO_BICUBIC
@ INTERP_ALGO_BICUBIC
Definition: vf_scale_cuda.c:67
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
ff_make_format_list
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
CUDAScaleContext::frames_ctx
AVBufferRef * frames_ctx
Definition: vf_scale_cuda.c:81
hwcontext_cuda_internal.h
cudascale_init
static av_cold int cudascale_init(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:119
out
FILE * out
Definition: movenc.c:54
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:339
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:978
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:92
AVHWFramesContext::format
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
Definition: hwcontext.h:209
ff_cuda_load_module
int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module, const unsigned char *data, const unsigned int length)
Loads a CUDA module and applies any decompression, if neccesary.
Definition: load_helper.c:34
INTERP_ALGO_NEAREST
@ INTERP_ALGO_NEAREST
Definition: vf_scale_cuda.c:65
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
CUDAScaleContext::tmp_frame
AVFrame * tmp_frame
Definition: vf_scale_cuda.c:84
CUDAScaleContext::passthrough
int passthrough
Definition: vf_scale_cuda.c:85
cudascale_uninit
static av_cold void cudascale_uninit(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:135
CUDAScaleContext::srcBuffer
CUdeviceptr srcBuffer
Definition: vf_scale_cuda.c:108
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:111
av_hwframe_ctx_init
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
Definition: hwcontext.c:333
CUDAScaleContext::w_expr
char * w_expr
width expression string
Definition: vf_scale_cuda.c:92
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:303
pixdesc.h
AVFrame::width
int width
Definition: frame.h:361
w
uint8_t w
Definition: llviddspenc.c:39
av_hwframe_ctx_alloc
AVBufferRef * av_hwframe_ctx_alloc(AVBufferRef *device_ref_in)
Allocate an AVHWFramesContext tied to a given device context.
Definition: hwcontext.c:247
AVOption
AVOption.
Definition: opt.h:248
init_hwframe_ctx
static av_cold int init_hwframe_ctx(CUDAScaleContext *s, AVBufferRef *device_ctx, int width, int height)
Definition: vf_scale_cuda.c:166
CUDAScaleContext::interp_use_linear
int interp_use_linear
Definition: vf_scale_cuda.c:113
FLAGS
#define FLAGS
Definition: vf_scale_cuda.c:630
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:197
ff_scale_eval_dimensions
int ff_scale_eval_dimensions(void *log_ctx, const char *w_expr, const char *h_expr, AVFilterLink *inlink, AVFilterLink *outlink, int *ret_w, int *ret_h)
Parse and evaluate string expressions for width and height.
Definition: scale_eval.c:57
float.h
DIV_UP
#define DIV_UP(a, b)
Definition: vf_scale_cuda.c:56
CUDAScaleContext::cu_func_ushort4
CUfunction cu_func_ushort4
Definition: vf_scale_cuda.c:105
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:149
AVHWFramesContext::width
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:229
video.h
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:317
CUDAScaleContext::frame
AVFrame * frame
Definition: vf_scale_cuda.c:82
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:65
formats.h
INTERP_ALGO_COUNT
@ INTERP_ALGO_COUNT
Definition: vf_scale_cuda.c:70
ff_default_get_video_buffer
AVFrame * ff_default_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:44
vf_scale_cuda.h
fail
#define fail()
Definition: checkasm.h:134
INTERP_ALGO_LANCZOS
@ INTERP_ALGO_LANCZOS
Definition: vf_scale_cuda.c:68
CHECK_CU
#define CHECK_CU(x)
Definition: vf_scale_cuda.c:60
scalecuda_resize
static int scalecuda_resize(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_scale_cuda.c:455
av_reduce
int av_reduce(int *dst_num, int *dst_den, int64_t num, int64_t den, int64_t max)
Reduce a fraction.
Definition: rational.c:35
AVRational::num
int num
Numerator.
Definition: rational.h:59
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:54
av_frame_alloc
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:98
cudascale_class
static const AVClass cudascale_class
Definition: vf_scale_cuda.c:649
cudascale_config_props
static av_cold int cudascale_config_props(AVFilterLink *outlink)
Definition: vf_scale_cuda.c:266
CUDAScaleContext::hwctx
AVCUDADeviceContext * hwctx
Definition: vf_scale_cuda.c:76
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:181
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
inputs
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
Definition: filter_design.txt:243
ff_set_common_formats
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:580
AVHWFramesContext::height
int height
Definition: hwcontext.h:229
CUDAScaleContext::interp_as_integer
int interp_as_integer
Definition: vf_scale_cuda.c:114
width
#define width
CUDAScaleContext::cu_func_ushort
CUfunction cu_func_ushort
Definition: vf_scale_cuda.c:103
s
#define s(width, name)
Definition: cbs_vp9.c:257
AV_PIX_FMT_YUV444P16
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:402
AV_PIX_FMT_0BGR32
#define AV_PIX_FMT_0BGR32
Definition: pixfmt.h:367
CUDAScaleContext::cu_stream
CUstream cu_stream
Definition: vf_scale_cuda.c:106
outputs
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
pix_fmts
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:290
ctx
AVFormatContext * ctx
Definition: movenc.c:48
channels
channels
Definition: aptx.h:33
load_helper.h
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
format_is_supported
static int format_is_supported(enum AVPixelFormat fmt)
Definition: vf_scale_cuda.c:203
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
CUDAScaleContext::param
float param
Definition: vf_scale_cuda.c:116
CUDAScaleContext::force_divisible_by
int force_divisible_by
Definition: vf_scale_cuda.c:96
LIBAVUTIL_VERSION_INT
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
CUDAScaleContext::interp_algo
int interp_algo
Definition: vf_scale_cuda.c:112
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:67
OFFSET
#define OFFSET(x)
Definition: vf_scale_cuda.c:629
NULL
#define NULL
Definition: coverity.c:32
AVHWFramesContext::sw_format
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:222
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:536
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:125
CUDAScaleContext::h_expr
char * h_expr
height expression string
Definition: vf_scale_cuda.c:93
BLOCKY
#define BLOCKY
Definition: vf_scale_cuda.c:58
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
AVHWFramesContext::device_ref
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
Definition: hwcontext.h:141
AVFilterContext::inputs
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:341
av_default_item_name
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
src
#define src
Definition: vp8dsp.c:255
cudascale_inputs
static const AVFilterPad cudascale_inputs[]
Definition: vf_scale_cuda.c:656
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
CUDAScaleContext::cu_func_uchar2
CUfunction cu_func_uchar2
Definition: vf_scale_cuda.c:101
cudascale_query_formats
static int cudascale_query_formats(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:154
scale_eval.h
height
#define height
CUDAScaleContext::cu_func_ushort2
CUfunction cu_func_ushort2
Definition: vf_scale_cuda.c:104
CUDAScaleContext::cu_module
CUmodule cu_module
Definition: vf_scale_cuda.c:99
ff_null_get_video_buffer
AVFrame * ff_null_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:39
internal.h
CUDAScaleContext
Definition: vf_scale_cuda.c:73
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Definition: opt.h:228
supported_formats
static enum AVPixelFormat supported_formats[]
Definition: vf_scale_cuda.c:45
i
int i
Definition: input.c:407
internal.h
common.h
av_frame_move_ref
void av_frame_move_ref(AVFrame *dst, AVFrame *src)
Move everything contained in src to dst and reset src.
Definition: frame.c:460
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:436
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:60
AV_PIX_FMT_P016
#define AV_PIX_FMT_P016
Definition: pixfmt.h:439
CUDAScaleContext::dstBuffer
CUdeviceptr dstBuffer
Definition: vf_scale_cuda.c:109
AVFilter
Filter definition.
Definition: avfilter.h:145
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
AVCUDADeviceContext
This struct is allocated as AVHWDeviceContext.hwctx.
Definition: hwcontext_cuda.h:42
BLOCKX
#define BLOCKX
Definition: vf_scale_cuda.c:57
ret
ret
Definition: filter_design.txt:187
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
AVClass::class_name
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:72
AV_PIX_FMT_0RGB32
#define AV_PIX_FMT_0RGB32
Definition: pixfmt.h:366
AVHWFramesContext::device_ctx
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:149
cuda_check.h
AV_PIX_FMT_P016LE
@ AV_PIX_FMT_P016LE
like NV12, with 16bpp per component, little-endian
Definition: pixfmt.h:290
AVFrame::sample_aspect_ratio
AVRational sample_aspect_ratio
Sample aspect ratio for the video frame, 0/1 if unknown/unspecified.
Definition: frame.h:391
ff_scale_adjust_dimensions
int ff_scale_adjust_dimensions(AVFilterLink *inlink, int *ret_w, int *ret_h, int force_original_aspect_ratio, int force_divisible_by)
Transform evaluated width and height obtained from ff_scale_eval_dimensions into actual target width ...
Definition: scale_eval.c:113
AVFrame::hw_frames_ctx
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame.
Definition: frame.h:607
AVFrame::height
int height
Definition: frame.h:361
cudascale_get_video_buffer
static AVFrame * cudascale_get_video_buffer(AVFilterLink *inlink, int w, int h)
Definition: vf_scale_cuda.c:620
AVRational::den
int den
Denominator.
Definition: rational.h:60
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
dummy
int dummy
Definition: motion.c:64
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:225
avfilter.h
INTERP_ALGO_DEFAULT
@ INTERP_ALGO_DEFAULT
Definition: vf_scale_cuda.c:63
CUDAScaleContext::cu_func_uchar4
CUfunction cu_func_uchar4
Definition: vf_scale_cuda.c:102
av_mul_q
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
av_buffer_ref
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:93
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
AVFilterContext
An instance of a filter.
Definition: avfilter.h:333
AV_PIX_FMT_P010
#define AV_PIX_FMT_P010
Definition: pixfmt.h:438
cudascale_outputs
static const AVFilterPad cudascale_outputs[]
Definition: vf_scale_cuda.c:666
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:84
AV_PIX_FMT_P010LE
@ AV_PIX_FMT_P010LE
like NV12, with 10bpp per component, data in the high bits, zeros in the low bits,...
Definition: pixfmt.h:274
cudascale_scale
static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_scale_cuda.c:548
cudascale_filter_frame
static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
Definition: vf_scale_cuda.c:577
CUDAScaleContext::force_original_aspect_ratio
int force_original_aspect_ratio
Definition: vf_scale_cuda.c:95
CUDAScaleContext::format
enum AVPixelFormat format
Output sw format.
Definition: vf_scale_cuda.c:90
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:48
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:242
CUDAScaleContext::cu_func_uchar
CUfunction cu_func_uchar
Definition: vf_scale_cuda.c:100
CUDAScaleContext::in_fmt
enum AVPixelFormat in_fmt
Definition: vf_scale_cuda.c:78
SCALE_CUDA_PARAM_DEFAULT
#define SCALE_CUDA_PARAM_DEFAULT
Definition: vf_scale_cuda.h:26
init_processing_chain
static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height, int out_width, int out_height)
Definition: vf_scale_cuda.c:213
hwcontext.h
AVERROR_BUG
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:334
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:28
CUDAScaleContext::out_fmt
enum AVPixelFormat out_fmt
Definition: vf_scale_cuda.c:79
h
h
Definition: vp9dsp_template.c:2038
avstring.h
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Definition: opt.h:229
av_hwframe_get_buffer
int av_hwframe_get_buffer(AVBufferRef *hwframe_ref, AVFrame *frame, int flags)
Allocate a new frame attached to the given AVHWFramesContext.
Definition: hwcontext.c:502
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Definition: opt.h:234
snprintf
#define snprintf
Definition: snprintf.h:34
CUDAScaleContext::tex_alignment
int tex_alignment
Definition: vf_scale_cuda.c:110
INTERP_ALGO_BILINEAR
@ INTERP_ALGO_BILINEAR
Definition: vf_scale_cuda.c:66
ff_vf_scale_cuda
const AVFilter ff_vf_scale_cuda
Definition: vf_scale_cuda.c:675
av_get_pix_fmt_name
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2461
CUDAScaleContext::cu_ctx
CUcontext cu_ctx
Definition: vf_scale_cuda.c:98