FFmpeg
vf_scale_cuda.c
Go to the documentation of this file.
1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22 
23 #include <float.h>
24 #include <stdio.h>
25 #include <string.h>
26 
27 #include "libavutil/avstring.h"
28 #include "libavutil/common.h"
29 #include "libavutil/hwcontext.h"
31 #include "libavutil/cuda_check.h"
32 #include "libavutil/internal.h"
33 #include "libavutil/opt.h"
34 #include "libavutil/pixdesc.h"
35 
36 #include "avfilter.h"
37 #include "formats.h"
38 #include "internal.h"
39 #include "scale_eval.h"
40 #include "video.h"
41 
42 #include "cuda/load_helper.h"
43 #include "vf_scale_cuda.h"
44 
45 static const enum AVPixelFormat supported_formats[] = {
54 };
55 
56 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
57 #define BLOCKX 32
58 #define BLOCKY 16
59 
60 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
61 
62 enum {
64 
69 
71 };
72 
73 typedef struct CUDAScaleContext {
74  const AVClass *class;
75 
77 
78  enum AVPixelFormat in_fmt, out_fmt;
83 
86 
89 
90  /**
91  * Output sw format. AV_PIX_FMT_NONE for no conversion.
92  */
94 
95  char *w_expr; ///< width expression string
96  char *h_expr; ///< height expression string
97 
100 
101  CUcontext cu_ctx;
102  CUmodule cu_module;
103  CUfunction cu_func;
104  CUfunction cu_func_uv;
105  CUstream cu_stream;
106 
110 
111  float param;
113 
115 {
116  CUDAScaleContext *s = ctx->priv;
117 
118  s->frame = av_frame_alloc();
119  if (!s->frame)
120  return AVERROR(ENOMEM);
121 
122  s->tmp_frame = av_frame_alloc();
123  if (!s->tmp_frame)
124  return AVERROR(ENOMEM);
125 
126  return 0;
127 }
128 
130 {
131  CUDAScaleContext *s = ctx->priv;
132 
133  if (s->hwctx && s->cu_module) {
134  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
135  CUcontext dummy;
136 
137  CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
138  CHECK_CU(cu->cuModuleUnload(s->cu_module));
139  s->cu_module = NULL;
140  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
141  }
142 
143  av_frame_free(&s->frame);
144  av_buffer_unref(&s->frames_ctx);
145  av_frame_free(&s->tmp_frame);
146 }
147 
149 {
150  static const enum AVPixelFormat pixel_formats[] = {
152  };
153 
154  return ff_set_common_formats_from_list(ctx, pixel_formats);
155 }
156 
157 static av_cold int init_hwframe_ctx(CUDAScaleContext *s, AVBufferRef *device_ctx, int width, int height)
158 {
159  AVBufferRef *out_ref = NULL;
160  AVHWFramesContext *out_ctx;
161  int ret;
162 
163  out_ref = av_hwframe_ctx_alloc(device_ctx);
164  if (!out_ref)
165  return AVERROR(ENOMEM);
166  out_ctx = (AVHWFramesContext*)out_ref->data;
167 
168  out_ctx->format = AV_PIX_FMT_CUDA;
169  out_ctx->sw_format = s->out_fmt;
170  out_ctx->width = FFALIGN(width, 32);
171  out_ctx->height = FFALIGN(height, 32);
172 
173  ret = av_hwframe_ctx_init(out_ref);
174  if (ret < 0)
175  goto fail;
176 
177  av_frame_unref(s->frame);
178  ret = av_hwframe_get_buffer(out_ref, s->frame, 0);
179  if (ret < 0)
180  goto fail;
181 
182  s->frame->width = width;
183  s->frame->height = height;
184 
185  av_buffer_unref(&s->frames_ctx);
186  s->frames_ctx = out_ref;
187 
188  return 0;
189 fail:
190  av_buffer_unref(&out_ref);
191  return ret;
192 }
193 
194 static int format_is_supported(enum AVPixelFormat fmt)
195 {
196  int i;
197 
198  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
199  if (supported_formats[i] == fmt)
200  return 1;
201  return 0;
202 }
203 
204 static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format)
205 {
206  CUDAScaleContext *s = ctx->priv;
207  int i, p, d;
208 
209  s->in_fmt = in_format;
210  s->out_fmt = out_format;
211 
212  s->in_desc = av_pix_fmt_desc_get(s->in_fmt);
213  s->out_desc = av_pix_fmt_desc_get(s->out_fmt);
214  s->in_planes = av_pix_fmt_count_planes(s->in_fmt);
215  s->out_planes = av_pix_fmt_count_planes(s->out_fmt);
216 
217  // find maximum step of each component of each plane
218  // For our subset of formats, this should accurately tell us how many channels CUDA needs
219  // i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
220 
221  for (i = 0; i < s->in_desc->nb_components; i++) {
222  d = (s->in_desc->comp[i].depth + 7) / 8;
223  p = s->in_desc->comp[i].plane;
224  s->in_plane_channels[p] = FFMAX(s->in_plane_channels[p], s->in_desc->comp[i].step / d);
225 
226  s->in_plane_depths[p] = s->in_desc->comp[i].depth;
227  }
228 }
229 
230 static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
231  int out_width, int out_height)
232 {
233  CUDAScaleContext *s = ctx->priv;
234 
235  AVHWFramesContext *in_frames_ctx;
236 
237  enum AVPixelFormat in_format;
238  enum AVPixelFormat out_format;
239  int ret;
240 
241  /* check that we have a hw context */
242  if (!ctx->inputs[0]->hw_frames_ctx) {
243  av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
244  return AVERROR(EINVAL);
245  }
246  in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
247  in_format = in_frames_ctx->sw_format;
248  out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
249 
250  if (!format_is_supported(in_format)) {
251  av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
252  av_get_pix_fmt_name(in_format));
253  return AVERROR(ENOSYS);
254  }
255  if (!format_is_supported(out_format)) {
256  av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
257  av_get_pix_fmt_name(out_format));
258  return AVERROR(ENOSYS);
259  }
260 
261  set_format_info(ctx, in_format, out_format);
262 
263  if (s->passthrough && in_width == out_width && in_height == out_height && in_format == out_format) {
264  s->frames_ctx = av_buffer_ref(ctx->inputs[0]->hw_frames_ctx);
265  if (!s->frames_ctx)
266  return AVERROR(ENOMEM);
267  } else {
268  s->passthrough = 0;
269 
270  ret = init_hwframe_ctx(s, in_frames_ctx->device_ref, out_width, out_height);
271  if (ret < 0)
272  return ret;
273 
274  if (in_width == out_width && in_height == out_height &&
275  in_format == out_format && s->interp_algo == INTERP_ALGO_DEFAULT)
276  s->interp_algo = INTERP_ALGO_NEAREST;
277  }
278 
279  ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
280  if (!ctx->outputs[0]->hw_frames_ctx)
281  return AVERROR(ENOMEM);
282 
283  return 0;
284 }
285 
287 {
288  CUDAScaleContext *s = ctx->priv;
289  CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
290  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
291  char buf[128];
292  int ret;
293 
294  const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt);
295  const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt);
296 
297  const char *function_infix = "";
298 
299  extern const unsigned char ff_vf_scale_cuda_ptx_data[];
300  extern const unsigned int ff_vf_scale_cuda_ptx_len;
301 
302  switch(s->interp_algo) {
303  case INTERP_ALGO_NEAREST:
304  function_infix = "Nearest";
305  s->interp_use_linear = 0;
306  s->interp_as_integer = 1;
307  break;
309  function_infix = "Bilinear";
310  s->interp_use_linear = 1;
311  s->interp_as_integer = 1;
312  break;
313  case INTERP_ALGO_DEFAULT:
314  case INTERP_ALGO_BICUBIC:
315  function_infix = "Bicubic";
316  s->interp_use_linear = 0;
317  s->interp_as_integer = 0;
318  break;
319  case INTERP_ALGO_LANCZOS:
320  function_infix = "Lanczos";
321  s->interp_use_linear = 0;
322  s->interp_as_integer = 0;
323  break;
324  default:
325  av_log(ctx, AV_LOG_ERROR, "Unknown interpolation algorithm\n");
326  return AVERROR_BUG;
327  }
328 
329  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
330  if (ret < 0)
331  return ret;
332 
333  ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module,
334  ff_vf_scale_cuda_ptx_data, ff_vf_scale_cuda_ptx_len);
335  if (ret < 0)
336  goto fail;
337 
338  snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s", function_infix, in_fmt_name, out_fmt_name);
339  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func, s->cu_module, buf));
340  if (ret < 0) {
341  av_log(ctx, AV_LOG_FATAL, "Unsupported conversion: %s -> %s\n", in_fmt_name, out_fmt_name);
342  ret = AVERROR(ENOSYS);
343  goto fail;
344  }
345 
346  snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name);
347  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf));
348  if (ret < 0)
349  goto fail;
350 
351 fail:
352  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
353 
354  return ret;
355 }
356 
358 {
359  AVFilterContext *ctx = outlink->src;
360  AVFilterLink *inlink = outlink->src->inputs[0];
361  CUDAScaleContext *s = ctx->priv;
362  AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
363  AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
364  int w, h;
365  int ret;
366 
367  s->hwctx = device_hwctx;
368  s->cu_stream = s->hwctx->stream;
369 
371  s->w_expr, s->h_expr,
372  inlink, outlink,
373  &w, &h)) < 0)
374  goto fail;
375 
377  s->force_original_aspect_ratio, s->force_divisible_by);
378 
379  if (((int64_t)h * inlink->w) > INT_MAX ||
380  ((int64_t)w * inlink->h) > INT_MAX)
381  av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
382 
383  outlink->w = w;
384  outlink->h = h;
385 
387  if (ret < 0)
388  return ret;
389 
390  if (inlink->sample_aspect_ratio.num) {
391  outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
392  outlink->w*inlink->h},
393  inlink->sample_aspect_ratio);
394  } else {
395  outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
396  }
397 
398  av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d fmt:%s -> w:%d h:%d fmt:%s%s\n",
399  inlink->w, inlink->h, av_get_pix_fmt_name(s->in_fmt),
400  outlink->w, outlink->h, av_get_pix_fmt_name(s->out_fmt),
401  s->passthrough ? " (passthrough)" : "");
402 
404  if (ret < 0)
405  return ret;
406 
407  return 0;
408 
409 fail:
410  return ret;
411 }
412 
413 static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
414  CUtexObject src_tex[4], int src_width, int src_height,
415  AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
416 {
417  CUDAScaleContext *s = ctx->priv;
418  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
419 
420  CUdeviceptr dst_devptr[4] = {
421  (CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1],
422  (CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
423  };
424 
425  void *args_uchar[] = {
426  &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
427  &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
428  &dst_width, &dst_height, &dst_pitch,
429  &src_width, &src_height, &s->param
430  };
431 
432  return CHECK_CU(cu->cuLaunchKernel(func,
433  DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
434  BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
435 }
436 
438  AVFrame *out, AVFrame *in)
439 {
440  CUDAScaleContext *s = ctx->priv;
441  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
442  CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
443  int i, ret;
444 
445  CUtexObject tex[4] = { 0, 0, 0, 0 };
446 
447  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
448  if (ret < 0)
449  return ret;
450 
451  for (i = 0; i < s->in_planes; i++) {
452  CUDA_TEXTURE_DESC tex_desc = {
453  .filterMode = s->interp_use_linear ?
454  CU_TR_FILTER_MODE_LINEAR :
455  CU_TR_FILTER_MODE_POINT,
456  .flags = s->interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0,
457  };
458 
459  CUDA_RESOURCE_DESC res_desc = {
460  .resType = CU_RESOURCE_TYPE_PITCH2D,
461  .res.pitch2D.format = s->in_plane_depths[i] <= 8 ?
462  CU_AD_FORMAT_UNSIGNED_INT8 :
463  CU_AD_FORMAT_UNSIGNED_INT16,
464  .res.pitch2D.numChannels = s->in_plane_channels[i],
465  .res.pitch2D.pitchInBytes = in->linesize[i],
466  .res.pitch2D.devPtr = (CUdeviceptr)in->data[i],
467  };
468 
469  if (i == 1 || i == 2) {
470  res_desc.res.pitch2D.width = AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w);
471  res_desc.res.pitch2D.height = AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h);
472  } else {
473  res_desc.res.pitch2D.width = in->width;
474  res_desc.res.pitch2D.height = in->height;
475  }
476 
477  ret = CHECK_CU(cu->cuTexObjectCreate(&tex[i], &res_desc, &tex_desc, NULL));
478  if (ret < 0)
479  goto exit;
480  }
481 
482  // scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
483  ret = call_resize_kernel(ctx, s->cu_func,
484  tex, in->width, in->height,
485  out, out->width, out->height, out->linesize[0]);
486  if (ret < 0)
487  goto exit;
488 
489  if (s->out_planes > 1) {
490  // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane.
491  ret = call_resize_kernel(ctx, s->cu_func_uv, tex,
492  AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w),
493  AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h),
494  out,
495  AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w),
496  AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),
497  out->linesize[1]);
498  if (ret < 0)
499  goto exit;
500  }
501 
502 exit:
503  for (i = 0; i < s->in_planes; i++)
504  if (tex[i])
505  CHECK_CU(cu->cuTexObjectDestroy(tex[i]));
506 
507  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
508 
509  return ret;
510 }
511 
513 {
514  CUDAScaleContext *s = ctx->priv;
515  AVFilterLink *outlink = ctx->outputs[0];
516  AVFrame *src = in;
517  int ret;
518 
519  ret = scalecuda_resize(ctx, s->frame, src);
520  if (ret < 0)
521  return ret;
522 
523  src = s->frame;
524  ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
525  if (ret < 0)
526  return ret;
527 
528  av_frame_move_ref(out, s->frame);
529  av_frame_move_ref(s->frame, s->tmp_frame);
530 
531  s->frame->width = outlink->w;
532  s->frame->height = outlink->h;
533 
534  ret = av_frame_copy_props(out, in);
535  if (ret < 0)
536  return ret;
537 
538  return 0;
539 }
540 
542 {
543  AVFilterContext *ctx = link->dst;
544  CUDAScaleContext *s = ctx->priv;
545  AVFilterLink *outlink = ctx->outputs[0];
546  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
547 
548  AVFrame *out = NULL;
549  CUcontext dummy;
550  int ret = 0;
551 
552  if (s->passthrough)
553  return ff_filter_frame(outlink, in);
554 
555  out = av_frame_alloc();
556  if (!out) {
557  ret = AVERROR(ENOMEM);
558  goto fail;
559  }
560 
561  ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
562  if (ret < 0)
563  goto fail;
564 
565  ret = cudascale_scale(ctx, out, in);
566 
567  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
568  if (ret < 0)
569  goto fail;
570 
571  av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
572  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
573  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
574  INT_MAX);
575 
576  av_frame_free(&in);
577  return ff_filter_frame(outlink, out);
578 fail:
579  av_frame_free(&in);
580  av_frame_free(&out);
581  return ret;
582 }
583 
585 {
586  CUDAScaleContext *s = inlink->dst->priv;
587 
588  return s->passthrough ?
591 }
592 
593 #define OFFSET(x) offsetof(CUDAScaleContext, x)
594 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
595 static const AVOption options[] = {
596  { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
597  { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
598  { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" },
599  { "nearest", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" },
600  { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
601  { "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC }, 0, 0, FLAGS, "interp_algo" },
602  { "lanczos", "lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_LANCZOS }, 0, 0, FLAGS, "interp_algo" },
603  { "format", "Output video pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, { .i64 = AV_PIX_FMT_NONE }, INT_MIN, INT_MAX, .flags=FLAGS },
604  { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
605  { "param", "Algorithm-Specific parameter", OFFSET(param), AV_OPT_TYPE_FLOAT, { .dbl = SCALE_CUDA_PARAM_DEFAULT }, -FLT_MAX, FLT_MAX, FLAGS },
606  { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, FLAGS, "force_oar" },
607  { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
608  { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
609  { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
610  { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, FLAGS },
611  { NULL },
612 };
613 
614 static const AVClass cudascale_class = {
615  .class_name = "cudascale",
616  .item_name = av_default_item_name,
617  .option = options,
618  .version = LIBAVUTIL_VERSION_INT,
619 };
620 
621 static const AVFilterPad cudascale_inputs[] = {
622  {
623  .name = "default",
624  .type = AVMEDIA_TYPE_VIDEO,
625  .filter_frame = cudascale_filter_frame,
626  .get_buffer.video = cudascale_get_video_buffer,
627  },
628 };
629 
630 static const AVFilterPad cudascale_outputs[] = {
631  {
632  .name = "default",
633  .type = AVMEDIA_TYPE_VIDEO,
634  .config_props = cudascale_config_props,
635  },
636 };
637 
639  .name = "scale_cuda",
640  .description = NULL_IF_CONFIG_SMALL("GPU accelerated video resizer"),
641 
642  .init = cudascale_init,
643  .uninit = cudascale_uninit,
644  .query_formats = cudascale_query_formats,
645 
646  .priv_size = sizeof(CUDAScaleContext),
647  .priv_class = &cudascale_class,
648 
651 
652  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
653 };
options
static const AVOption options[]
Definition: vf_scale_cuda.c:595
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
AVHWDeviceContext::hwctx
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:92
AV_PIX_FMT_CUDA
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
Definition: pixfmt.h:225
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
CUDAScaleContext::frames_ctx
AVBufferRef * frames_ctx
Definition: vf_scale_cuda.c:84
hwcontext_cuda_internal.h
cudascale_init
static av_cold int cudascale_init(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:114
out
FILE * out
Definition: movenc.c:54
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:334
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1019
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2540
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
AVHWFramesContext::format
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
Definition: hwcontext.h:209
ff_cuda_load_module
int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module, const unsigned char *data, const unsigned int length)
Loads a CUDA module and applies any decompression, if neccesary.
Definition: load_helper.c:34
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
CUDAScaleContext::tmp_frame
AVFrame * tmp_frame
Definition: vf_scale_cuda.c:87
CUDAScaleContext::passthrough
int passthrough
Definition: vf_scale_cuda.c:88
cudascale_uninit
static av_cold void cudascale_uninit(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:129
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:112
av_hwframe_ctx_init
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
Definition: hwcontext.c:333
CUDAScaleContext::w_expr
char * w_expr
width expression string
Definition: vf_scale_cuda.c:95
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:303
pixdesc.h
AVFrame::width
int width
Definition: frame.h:361
w
uint8_t w
Definition: llviddspenc.c:38
av_hwframe_ctx_alloc
AVBufferRef * av_hwframe_ctx_alloc(AVBufferRef *device_ref_in)
Allocate an AVHWFramesContext tied to a given device context.
Definition: hwcontext.c:247
AVOption
AVOption.
Definition: opt.h:247
init_hwframe_ctx
static av_cold int init_hwframe_ctx(CUDAScaleContext *s, AVBufferRef *device_ctx, int width, int height)
Definition: vf_scale_cuda.c:157
CUDAScaleContext::interp_use_linear
int interp_use_linear
Definition: vf_scale_cuda.c:108
FLAGS
#define FLAGS
Definition: vf_scale_cuda.c:594
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
ff_scale_eval_dimensions
int ff_scale_eval_dimensions(void *log_ctx, const char *w_expr, const char *h_expr, AVFilterLink *inlink, AVFilterLink *outlink, int *ret_w, int *ret_h)
Parse and evaluate string expressions for width and height.
Definition: scale_eval.c:57
float.h
DIV_UP
#define DIV_UP(a, b)
Definition: vf_scale_cuda.c:56
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
av_buffer_ref
AVBufferRef * av_buffer_ref(const AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:103
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:153
AVHWFramesContext::width
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:229
video.h
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:317
CUDAScaleContext::frame
AVFrame * frame
Definition: vf_scale_cuda.c:85
INTERP_ALGO_BILINEAR
@ INTERP_ALGO_BILINEAR
Definition: vf_scale_cuda.c:66
formats.h
ff_default_get_video_buffer
AVFrame * ff_default_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:43
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2580
INTERP_ALGO_NEAREST
@ INTERP_ALGO_NEAREST
Definition: vf_scale_cuda.c:65
CUDAScaleContext::cu_func_uv
CUfunction cu_func_uv
Definition: vf_scale_cuda.c:104
vf_scale_cuda.h
fail
#define fail()
Definition: checkasm.h:127
CHECK_CU
#define CHECK_CU(x)
Definition: vf_scale_cuda.c:60
CUDAScaleContext::in_planes
int in_planes
Definition: vf_scale_cuda.c:80
scalecuda_resize
static int scalecuda_resize(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_scale_cuda.c:437
CUDAScaleContext::in_plane_channels
int in_plane_channels[4]
Definition: vf_scale_cuda.c:82
av_reduce
int av_reduce(int *dst_num, int *dst_den, int64_t num, int64_t den, int64_t max)
Reduce a fraction.
Definition: rational.c:35
AVRational::num
int num
Numerator.
Definition: rational.h:59
cudascale_load_functions
static av_cold int cudascale_load_functions(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:286
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:50
av_frame_alloc
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:99
cudascale_class
static const AVClass cudascale_class
Definition: vf_scale_cuda.c:614
cudascale_config_props
static av_cold int cudascale_config_props(AVFilterLink *outlink)
Definition: vf_scale_cuda.c:357
CUDAScaleContext::hwctx
AVCUDADeviceContext * hwctx
Definition: vf_scale_cuda.c:76
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
set_format_info
static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format)
Definition: vf_scale_cuda.c:204
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
AVHWFramesContext::height
int height
Definition: hwcontext.h:229
CUDAScaleContext::interp_as_integer
int interp_as_integer
Definition: vf_scale_cuda.c:109
width
#define width
s
#define s(width, name)
Definition: cbs_vp9.c:257
AV_PIX_FMT_YUV444P16
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:402
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:51
AV_PIX_FMT_0BGR32
#define AV_PIX_FMT_0BGR32
Definition: pixfmt.h:367
INTERP_ALGO_DEFAULT
@ INTERP_ALGO_DEFAULT
Definition: vf_scale_cuda.c:63
CUDAScaleContext::cu_stream
CUstream cu_stream
Definition: vf_scale_cuda.c:105
ff_set_common_formats_from_list
int ff_set_common_formats_from_list(AVFilterContext *ctx, const int *fmts)
Equivalent to ff_set_common_formats(ctx, ff_make_format_list(fmts))
Definition: formats.c:703
ctx
AVFormatContext * ctx
Definition: movenc.c:48
load_helper.h
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:152
format_is_supported
static int format_is_supported(enum AVPixelFormat fmt)
Definition: vf_scale_cuda.c:194
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
CUDAScaleContext::param
float param
Definition: vf_scale_cuda.c:111
if
if(ret)
Definition: filter_design.txt:179
CUDAScaleContext::force_divisible_by
int force_divisible_by
Definition: vf_scale_cuda.c:99
LIBAVUTIL_VERSION_INT
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
CUDAScaleContext::interp_algo
int interp_algo
Definition: vf_scale_cuda.c:107
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
OFFSET
#define OFFSET(x)
Definition: vf_scale_cuda.c:593
NULL
#define NULL
Definition: coverity.c:32
AVHWFramesContext::sw_format
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:222
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:537
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
CUDAScaleContext::out_desc
const AVPixFmtDescriptor * out_desc
Definition: vf_scale_cuda.c:79
CUDAScaleContext::h_expr
char * h_expr
height expression string
Definition: vf_scale_cuda.c:96
BLOCKY
#define BLOCKY
Definition: vf_scale_cuda.c:58
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
AVHWFramesContext::device_ref
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
Definition: hwcontext.h:141
AVFilterContext::inputs
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:354
av_default_item_name
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:235
src
#define src
Definition: vp8dsp.c:255
cudascale_inputs
static const AVFilterPad cudascale_inputs[]
Definition: vf_scale_cuda.c:621
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
CUDAScaleContext::cu_func
CUfunction cu_func
Definition: vf_scale_cuda.c:103
INTERP_ALGO_LANCZOS
@ INTERP_ALGO_LANCZOS
Definition: vf_scale_cuda.c:68
cudascale_query_formats
static int cudascale_query_formats(AVFilterContext *ctx)
Definition: vf_scale_cuda.c:148
scale_eval.h
height
#define height
INTERP_ALGO_COUNT
@ INTERP_ALGO_COUNT
Definition: vf_scale_cuda.c:70
CUDAScaleContext::cu_module
CUmodule cu_module
Definition: vf_scale_cuda.c:102
ff_null_get_video_buffer
AVFrame * ff_null_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:38
internal.h
CUDAScaleContext
Definition: vf_scale_cuda.c:73
AV_OPT_TYPE_FLOAT
@ AV_OPT_TYPE_FLOAT
Definition: opt.h:227
supported_formats
static enum AVPixelFormat supported_formats[]
Definition: vf_scale_cuda.c:45
i
int i
Definition: input.c:406
internal.h
common.h
call_resize_kernel
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, CUtexObject src_tex[4], int src_width, int src_height, AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
Definition: vf_scale_cuda.c:413
av_frame_move_ref
void av_frame_move_ref(AVFrame *dst, AVFrame *src)
Move everything contained in src to dst and reset src.
Definition: frame.c:461
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:437
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:56
AV_PIX_FMT_P016
#define AV_PIX_FMT_P016
Definition: pixfmt.h:439
CUDAScaleContext::in_plane_depths
int in_plane_depths[4]
Definition: vf_scale_cuda.c:81
AVFilter
Filter definition.
Definition: avfilter.h:149
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
AVCUDADeviceContext
This struct is allocated as AVHWDeviceContext.hwctx.
Definition: hwcontext_cuda.h:42
BLOCKX
#define BLOCKX
Definition: vf_scale_cuda.c:57
CUDAScaleContext::out_fmt
enum AVPixelFormat in_fmt out_fmt
Definition: vf_scale_cuda.c:78
ret
ret
Definition: filter_design.txt:187
AV_LOG_FATAL
#define AV_LOG_FATAL
Something went wrong and recovery is not possible.
Definition: log.h:174
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
AVClass::class_name
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:71
AV_PIX_FMT_0RGB32
#define AV_PIX_FMT_0RGB32
Definition: pixfmt.h:366
AVHWFramesContext::device_ctx
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:149
cuda_check.h
AVFrame::sample_aspect_ratio
AVRational sample_aspect_ratio
Sample aspect ratio for the video frame, 0/1 if unknown/unspecified.
Definition: frame.h:391
ff_scale_adjust_dimensions
int ff_scale_adjust_dimensions(AVFilterLink *inlink, int *ret_w, int *ret_h, int force_original_aspect_ratio, int force_divisible_by)
Transform evaluated width and height obtained from ff_scale_eval_dimensions into actual target width ...
Definition: scale_eval.c:113
AVFrame::height
int height
Definition: frame.h:361
cudascale_get_video_buffer
static AVFrame * cudascale_get_video_buffer(AVFilterLink *inlink, int w, int h)
Definition: vf_scale_cuda.c:584
AVRational::den
int den
Denominator.
Definition: rational.h:60
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:65
dummy
int dummy
Definition: motion.c:65
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:224
avfilter.h
AV_OPT_TYPE_PIXEL_FMT
@ AV_OPT_TYPE_PIXEL_FMT
Definition: opt.h:235
av_mul_q
AVRational av_mul_q(AVRational b, AVRational c)
Multiply two rationals.
Definition: rational.c:80
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
AVFilterContext
An instance of a filter.
Definition: avfilter.h:346
AV_PIX_FMT_P010
#define AV_PIX_FMT_P010
Definition: pixfmt.h:438
cudascale_outputs
static const AVFilterPad cudascale_outputs[]
Definition: vf_scale_cuda.c:630
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
cudascale_scale
static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_scale_cuda.c:512
cudascale_filter_frame
static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
Definition: vf_scale_cuda.c:541
INTERP_ALGO_BICUBIC
@ INTERP_ALGO_BICUBIC
Definition: vf_scale_cuda.c:67
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
CUDAScaleContext::force_original_aspect_ratio
int force_original_aspect_ratio
Definition: vf_scale_cuda.c:98
CUDAScaleContext::format
enum AVPixelFormat format
Output sw format.
Definition: vf_scale_cuda.c:93
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:241
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:153
format
fg outputs[0] format
Definition: ffmpeg_filter.c:175
SCALE_CUDA_PARAM_DEFAULT
#define SCALE_CUDA_PARAM_DEFAULT
Definition: vf_scale_cuda.h:26
d
d
Definition: ffmpeg_filter.c:156
CUDAScaleContext::in_desc
const AVPixFmtDescriptor * in_desc
Definition: vf_scale_cuda.c:79
init_processing_chain
static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height, int out_width, int out_height)
Definition: vf_scale_cuda.c:230
hwcontext.h
AVERROR_BUG
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:52
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:334
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:28
h
h
Definition: vp9dsp_template.c:2038
avstring.h
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Definition: opt.h:228
av_hwframe_get_buffer
int av_hwframe_get_buffer(AVBufferRef *hwframe_ref, AVFrame *frame, int flags)
Allocate a new frame attached to the given AVHWFramesContext.
Definition: hwcontext.c:502
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Definition: opt.h:233
snprintf
#define snprintf
Definition: snprintf.h:34
ff_vf_scale_cuda
const AVFilter ff_vf_scale_cuda
Definition: vf_scale_cuda.c:638
av_get_pix_fmt_name
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2460
CUDAScaleContext::out_planes
int out_planes
Definition: vf_scale_cuda.c:80
CUDAScaleContext::cu_ctx
CUcontext cu_ctx
Definition: vf_scale_cuda.c:101