FFmpeg
vf_transpose_cuda.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2026 NyanMisaka
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/common.h"
23 #include "libavutil/hwcontext.h"
25 #include "libavutil/cuda_check.h"
26 #include "libavutil/internal.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 
30 #include "avfilter.h"
31 #include "filters.h"
32 #include "transpose.h"
33 #include "video.h"
34 
35 #include "cuda/load_helper.h"
36 
37 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
38 #define BLOCK_X 32
39 #define BLOCK_Y 16
40 
41 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
42 
43 static const enum AVPixelFormat supported_formats[] = {
65 };
66 
67 typedef struct TransposeCUDAContext {
68  const AVClass *class;
69 
74 
76 
77  CUcontext cu_ctx;
78  CUmodule cu_module;
79  CUfunction cu_func_uchar;
80  CUfunction cu_func_ushort;
81  CUfunction cu_func_uchar2;
82  CUfunction cu_func_ushort2;
83  CUfunction cu_func_uchar4;
84  CUstream cu_stream;
85 
86  int flip_wh;
87  int passthrough; ///< PassthroughType, landscape passthrough mode enabled
88  int dir; ///< TransposeDir
90 
92 {
93  TransposeCUDAContext *s = ctx->priv;
94 
95  s->frame = av_frame_alloc();
96  if (!s->frame)
97  return AVERROR(ENOMEM);
98 
99  s->tmp_frame = av_frame_alloc();
100  if (!s->tmp_frame)
101  return AVERROR(ENOMEM);
102 
103  return 0;
104 }
105 
107 {
108  TransposeCUDAContext *s = ctx->priv;
109 
110  if (s->hwctx && s->cu_module) {
111  CUcontext dummy;
112  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
113  CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
114  CHECK_CU(cu->cuModuleUnload(s->cu_module));
115  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
116  }
117 
118  av_frame_free(&s->frame);
119  av_buffer_unref(&s->frames_ctx);
120  av_frame_free(&s->tmp_frame);
121 }
122 
124  AVBufferRef *device_ctx,
125  int width, int height,
126  enum AVPixelFormat sw_format)
127 {
128  AVBufferRef *out_ref = NULL;
129  AVHWFramesContext *out_ctx;
130  int ret;
131 
132  out_ref = av_hwframe_ctx_alloc(device_ctx);
133  if (!out_ref)
134  return AVERROR(ENOMEM);
135  out_ctx = (AVHWFramesContext*)out_ref->data;
136 
137  out_ctx->format = AV_PIX_FMT_CUDA;
138  out_ctx->sw_format = sw_format;
139  out_ctx->width = FFALIGN(width, 32);
140  out_ctx->height = FFALIGN(height, 32);
141 
142  ret = av_hwframe_ctx_init(out_ref);
143  if (ret < 0)
144  goto fail;
145 
146  av_frame_unref(s->frame);
147  ret = av_hwframe_get_buffer(out_ref, s->frame, 0);
148  if (ret < 0)
149  goto fail;
150 
151  s->frame->width = width;
152  s->frame->height = height;
153 
154  av_buffer_unref(&s->frames_ctx);
155  s->frames_ctx = out_ref;
156 
157  return 0;
158 fail:
159  av_buffer_unref(&out_ref);
160  return ret;
161 }
162 
163 static int format_is_supported(enum AVPixelFormat fmt)
164 {
165  for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
166  if (supported_formats[i] == fmt)
167  return 1;
168  return 0;
169 }
170 
172  int out_width, int out_height)
173 {
174  FilterLink *inl = ff_filter_link(ctx->inputs[0]);
175  FilterLink *outl = ff_filter_link(ctx->outputs[0]);
176  TransposeCUDAContext *s = ctx->priv;
177  AVHWFramesContext *in_frames_ctx;
178  enum AVPixelFormat format;
179  int ret;
180 
181  /* check that we have a hw context */
182  if (!inl->hw_frames_ctx) {
183  av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
184  return AVERROR(EINVAL);
185  }
186 
187  in_frames_ctx = (AVHWFramesContext*)inl->hw_frames_ctx->data;
188  format = in_frames_ctx->sw_format;
189  s->pix_desc = av_pix_fmt_desc_get(format);
190 
191  if (!format_is_supported(format)) {
192  av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
194  return AVERROR(ENOSYS);
195  }
196 
197  ret = init_hwframe_ctx(s, in_frames_ctx->device_ref,
198  out_width, out_height, format);
199  if (ret < 0)
200  return ret;
201 
202  s->hwctx = in_frames_ctx->device_ctx->hwctx;
203  s->cu_stream = s->hwctx->stream;
204 
205  outl->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
206  if (!outl->hw_frames_ctx)
207  return AVERROR(ENOMEM);
208 
209  return 0;
210 }
211 
213 {
214  extern const unsigned char ff_vf_transpose_cuda_ptx_data[];
215  extern const unsigned int ff_vf_transpose_cuda_ptx_len;
216  FilterLink *outl = ff_filter_link(outlink);
217  AVFilterContext *ctx = outlink->src;
218  AVFilterLink *inlink = ctx->inputs[0];
220  TransposeCUDAContext *s = ctx->priv;
221  CUcontext dummy, cuda_ctx;
222  CudaFunctions *cu;
223  int ret = 0;
224 
225  if ((inlink->w >= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||
226  (inlink->w <= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {
227  if (inl->hw_frames_ctx) {
229  if (!outl->hw_frames_ctx)
230  return AVERROR(ENOMEM);
231  }
232 
234  "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
235  inlink->w, inlink->h, inlink->w, inlink->h);
236  return 0;
237  } else {
238  s->passthrough = TRANSPOSE_PT_TYPE_NONE;
239  }
240 
241  switch (s->dir) {
243  case TRANSPOSE_CCLOCK:
244  case TRANSPOSE_CLOCK:
246  outlink->w = inlink->h;
247  outlink->h = inlink->w;
248  s->flip_wh = 1;
249  break;
250  default:
251  outlink->w = inlink->w;
252  outlink->h = inlink->h;
253  s->flip_wh = 0;
254  break;
255  }
256 
257  if (s->flip_wh && inlink->sample_aspect_ratio.num)
258  outlink->sample_aspect_ratio = av_inv_q(inlink->sample_aspect_ratio);
259  else
260  outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
261 
262  ret = init_processing_chain(ctx, outlink->w, outlink->h);
263  if (ret < 0)
264  return ret;
265 
266  cuda_ctx = s->cu_ctx = s->hwctx->cuda_ctx;
267  cu = s->hwctx->internal->cuda_dl;
268 
269  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
270  if (ret < 0)
271  return ret;
272 
273  ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module,
274  ff_vf_transpose_cuda_ptx_data, ff_vf_transpose_cuda_ptx_len);
275  if (ret < 0)
276  goto exit;
277 
278  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Transpose_Cuda_uchar"));
279  if (ret < 0)
280  goto exit;
281 
282  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Transpose_Cuda_ushort"));
283  if (ret < 0)
284  goto exit;
285 
286  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Transpose_Cuda_uchar2"));
287  if (ret < 0)
288  goto exit;
289 
290  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Transpose_Cuda_ushort2"));
291  if (ret < 0)
292  goto exit;
293 
294  ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Transpose_Cuda_uchar4"));
295  if (ret < 0)
296  goto exit;
297 
299  "w:%d h:%d dir:%d -> w:%d h:%d\n",
300  inlink->w, inlink->h, s->dir, outlink->w, outlink->h);
301 exit:
302  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
303 
304  return ret;
305 }
306 
307 static CUresult call_kernel(AVFilterContext *ctx,
308  CUfunction cu_func,
309  CUarray_format cu_format,
310  int channels,
311  int is_422_uv, // Dst* & Src* are 4:2:2 UV planes
312  CUdeviceptr dst0,
313  CUdeviceptr dst1, // Dst1 is for fully planar V, optional
314  int dst_width, // Width is pixels per channel
315  int dst_height, // Height is pixels per channel
316  int dst_pitch, // Pitch is elements per channel
317  CUdeviceptr src0,
318  CUdeviceptr src1, // Src1 is for fully planar V, optional
319  int src_width, // Width is pixels per channel
320  int src_height, // Height is pixels per channel
321  int src_pitch)
322 {
323  TransposeCUDAContext *s = ctx->priv;
324  CudaFunctions *cu = s->hwctx->internal->cuda_dl;
325  CUtexObject src0_tex = 0, src1_tex = 0;
326  int ret;
327 
328  void *kernel_args[] = {
329  &dst0, &dst1, &dst_width, &dst_height, &dst_pitch,
330  &src0_tex, &src1_tex, &s->dir,
331  };
332 
333  CUDA_TEXTURE_DESC tex_desc = {
334  .addressMode = { CU_TR_ADDRESS_MODE_CLAMP,
335  CU_TR_ADDRESS_MODE_CLAMP },
336  .filterMode = is_422_uv ? CU_TR_FILTER_MODE_LINEAR
337  : CU_TR_FILTER_MODE_POINT,
338  .flags = 2 /* CU_TRSF_NORMALIZED_COORDINATES */
339  };
340  CUDA_RESOURCE_DESC res_desc = {
341  .resType = CU_RESOURCE_TYPE_PITCH2D,
342  .res.pitch2D.format = cu_format,
343  .res.pitch2D.numChannels = channels,
344  .res.pitch2D.pitchInBytes = src_pitch,
345  .res.pitch2D.width = src_width,
346  .res.pitch2D.height = src_height
347  };
348 
349  res_desc.res.pitch2D.devPtr = (CUdeviceptr)src0;
350  ret = CHECK_CU(cu->cuTexObjectCreate(&src0_tex, &res_desc, &tex_desc, NULL));
351  if (ret < 0)
352  goto exit;
353 
354  if (src1) {
355  res_desc.res.pitch2D.devPtr = (CUdeviceptr)src1;
356  ret = CHECK_CU(cu->cuTexObjectCreate(&src1_tex, &res_desc, &tex_desc, NULL));
357  if (ret < 0)
358  goto exit;
359  }
360 
361  ret = CHECK_CU(cu->cuLaunchKernel(cu_func,
362  DIV_UP(dst_width, BLOCK_X), DIV_UP(dst_height, BLOCK_Y), 1,
363  BLOCK_X, BLOCK_Y, 1, 0, s->cu_stream, kernel_args, NULL));
364 exit:
365  if (src0_tex)
366  CHECK_CU(cu->cuTexObjectDestroy(src0_tex));
367  if (src1_tex)
368  CHECK_CU(cu->cuTexObjectDestroy(src1_tex));
369 
370  return ret;
371 }
372 
374  AVFrame *out, AVFrame *in)
375 {
376  TransposeCUDAContext *s = ctx->priv;
377  int ret;
378 
379  for (int c = 0; c < s->pix_desc->nb_components; c++) {
380  const AVComponentDescriptor *comp = &s->pix_desc->comp[c];
381  const int p = comp->plane;
382  int pix_size, channels;
383  int is_planar_u, is_planar_v, is_422_uv;
384  CUfunction func;
385  CUarray_format format;
386 
387  pix_size = (comp->depth + 7) / 8;
388  channels = comp->step / pix_size;
389  if (pix_size > 2 || channels > 4)
390  av_unreachable("Unsupported pixel format!");
391 
392  is_planar_u = p == 1 && channels == 1;
393  is_planar_v = p == 2 && channels == 1;
394  is_422_uv = p && s->pix_desc->log2_chroma_w == 1 && !s->pix_desc->log2_chroma_h;
395 
396  if (comp->plane < c || is_planar_v) {
397  // We process planes as a whole, so don't reprocess
398  // them for additional components
399  continue;
400  }
401 
402  switch (pix_size) {
403  case 1:
404  func = channels == 4 ? s->cu_func_uchar4 :
405  channels == 2 ? s->cu_func_uchar2 : s->cu_func_uchar;
406  format = CU_AD_FORMAT_UNSIGNED_INT8;
407  break;
408  case 2:
409  func = channels == 2 ? s->cu_func_ushort2 : s->cu_func_ushort;
410  format = CU_AD_FORMAT_UNSIGNED_INT16;
411  break;
412  default:
413  av_unreachable("Unsupported pixel format!");
414  }
415 
416  ret = call_kernel(ctx, func, format, channels, is_422_uv,
417  (CUdeviceptr)out->data[p],
418  (CUdeviceptr)(is_planar_u ? out->data[p+1] : NULL),
419  AV_CEIL_RSHIFT(out->width, p ? s->pix_desc->log2_chroma_w : 0),
420  AV_CEIL_RSHIFT(out->height, p ? s->pix_desc->log2_chroma_h : 0),
421  out->linesize[p] / comp->step,
422  (CUdeviceptr)in->data[p],
423  (CUdeviceptr)(is_planar_u ? in->data[p+1] : NULL),
424  AV_CEIL_RSHIFT(in->width, p ? s->pix_desc->log2_chroma_w : 0),
425  AV_CEIL_RSHIFT(in->height, p ? s->pix_desc->log2_chroma_h : 0),
426  in->linesize[p]);
427  if (ret < 0)
428  return ret;
429  }
430 
431  return 0;
432 }
433 
435  AVFrame *out, AVFrame *in)
436 {
437  TransposeCUDAContext *s = ctx->priv;
438  AVFilterLink *outlink = ctx->outputs[0];
439  int ret;
440 
441  ret = cudatranspose_rotate(ctx, s->frame, in);
442  if (ret < 0)
443  return ret;
444 
445  ret = av_hwframe_get_buffer(s->frame->hw_frames_ctx, s->tmp_frame, 0);
446  if (ret < 0)
447  return ret;
448 
449  av_frame_move_ref(out, s->frame);
450  av_frame_move_ref(s->frame, s->tmp_frame);
451 
452  s->frame->width = outlink->w;
453  s->frame->height = outlink->h;
454 
455  ret = av_frame_copy_props(out, in);
456  if (ret < 0)
457  return ret;
458 
459  if (s->flip_wh && in->sample_aspect_ratio.num)
460  out->sample_aspect_ratio = av_inv_q(in->sample_aspect_ratio);
461  else
462  out->sample_aspect_ratio = in->sample_aspect_ratio;
463 
464  return 0;
465 }
466 
468 {
469  AVFilterContext *ctx = link->dst;
470  TransposeCUDAContext *s = ctx->priv;
471  AVFilterLink *outlink = ctx->outputs[0];
472  CudaFunctions *cu;
473  AVFrame *out = NULL;
474  CUcontext dummy;
475  int ret = 0;
476 
477  if (s->passthrough)
478  return ff_filter_frame(outlink, in);
479 
480  out = av_frame_alloc();
481  if (!out) {
482  ret = AVERROR(ENOMEM);
483  goto fail;
484  }
485 
486  cu = s->hwctx->internal->cuda_dl;
487 
488  ret = CHECK_CU(cu->cuCtxPushCurrent(s->cu_ctx));
489  if (ret < 0)
490  goto fail;
491 
493 
494  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
495  if (ret < 0)
496  goto fail;
497 
498  av_frame_free(&in);
499 
500  return ff_filter_frame(outlink, out);
501 
502 fail:
503  av_frame_free(&in);
504  av_frame_free(&out);
505  return ret;
506 }
507 
509 {
510  TransposeCUDAContext *s = inlink->dst->priv;
511 
512  return s->passthrough ?
515 }
516 
517 #define OFFSET(x) offsetof(TransposeCUDAContext, x)
518 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
519 
520 static const AVOption cudatranspose_options[] = {
521  { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 6, FLAGS, .unit = "dir" },
522  { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 0, FLAGS, .unit = "dir" },
523  { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, 0, 0, FLAGS, .unit = "dir" },
524  { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, 0, 0, FLAGS, .unit = "dir" },
525  { "clock_flip", "rotate clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, 0, 0, FLAGS, .unit = "dir" },
526  { "reversal", "rotate by half-turn", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_REVERSAL }, 0, 0, FLAGS, .unit = "dir" },
527  { "hflip", "flip horizontally", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_HFLIP }, 0, 0, FLAGS, .unit = "dir" },
528  { "vflip", "flip vertically", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_VFLIP }, 0, 0, FLAGS, .unit = "dir" },
529 
530  { "passthrough", "do not apply transposition if the input matches the specified geometry", OFFSET(passthrough), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_PT_TYPE_NONE }, 0, 2, FLAGS, .unit = "passthrough" },
531  { "none", "always apply transposition", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_NONE }, 0, 0, FLAGS, .unit = "passthrough" },
532  { "landscape", "preserve landscape geometry", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_LANDSCAPE }, 0, 0, FLAGS, .unit = "passthrough" },
533  { "portrait", "preserve portrait geometry", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_PT_TYPE_PORTRAIT }, 0, 0, FLAGS, .unit = "passthrough" },
534 
535  { NULL },
536 };
537 
538 AVFILTER_DEFINE_CLASS(cudatranspose);
539 
541  {
542  .name = "default",
543  .type = AVMEDIA_TYPE_VIDEO,
544  .filter_frame = cudatranspose_filter_frame,
545  .get_buffer.video = cudatranspose_get_video_buffer,
546  },
547 };
548 
550  {
551  .name = "default",
552  .type = AVMEDIA_TYPE_VIDEO,
553  .config_props = cudatranspose_config_props,
554  },
555 };
556 
558  .p.name = "transpose_cuda",
559  .p.description = NULL_IF_CONFIG_SMALL("Transpose input video using CUDA"),
560  .p.priv_class = &cudatranspose_class,
561  .init = cudatranspose_init,
562  .uninit = cudatranspose_uninit,
563  .priv_size = sizeof(TransposeCUDAContext),
567  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
568 };
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
AVHWDeviceContext::hwctx
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:88
call_kernel
static CUresult call_kernel(AVFilterContext *ctx, CUfunction cu_func, CUarray_format cu_format, int channels, int is_422_uv, CUdeviceptr dst0, CUdeviceptr dst1, int dst_width, int dst_height, int dst_pitch, CUdeviceptr src0, CUdeviceptr src1, int src_width, int src_height, int src_pitch)
Definition: vf_transpose_cuda.c:307
AV_PIX_FMT_CUDA
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
Definition: pixfmt.h:260
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
format_is_supported
static int format_is_supported(enum AVPixelFormat fmt)
Definition: vf_transpose_cuda.c:163
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
TransposeCUDAContext::cu_stream
CUstream cu_stream
Definition: vf_transpose_cuda.c:84
TransposeCUDAContext::cu_func_uchar
CUfunction cu_func_uchar
Definition: vf_transpose_cuda.c:79
hwcontext_cuda_internal.h
out
static FILE * out
Definition: movenc.c:55
cudatranspose_filter_frame
static int cudatranspose_filter_frame(AVFilterLink *link, AVFrame *in)
Definition: vf_transpose_cuda.c:467
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:79
TransposeCUDAContext::cu_module
CUmodule cu_module
Definition: vf_transpose_cuda.c:78
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:513
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1068
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
TransposeCUDAContext::dir
int dir
TransposeDir.
Definition: vf_transpose_cuda.c:88
AVHWFramesContext::format
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
Definition: hwcontext.h:200
src1
const pixel * src1
Definition: h264pred_template.c:420
ff_cuda_load_module
int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module, const unsigned char *data, const unsigned int length)
Loads a CUDA module and applies any decompression, if necessary.
Definition: load_helper.c:34
av_cold
#define av_cold
Definition: attributes.h:119
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
AV_PIX_FMT_YUV444P10MSB
#define AV_PIX_FMT_YUV444P10MSB
Definition: pixfmt.h:554
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: filters.h:208
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:64
av_hwframe_ctx_init
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
Definition: hwcontext.c:337
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:466
pixdesc.h
AVFrame::width
int width
Definition: frame.h:538
av_hwframe_ctx_alloc
AVBufferRef * av_hwframe_ctx_alloc(AVBufferRef *device_ref_in)
Allocate an AVHWFramesContext tied to a given device context.
Definition: hwcontext.c:263
AVOption
AVOption.
Definition: opt.h:428
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: filters.h:254
filters.h
AV_PIX_FMT_YUV420P10
#define AV_PIX_FMT_YUV420P10
Definition: pixfmt.h:539
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:226
TRANSPOSE_CLOCK_FLIP
@ TRANSPOSE_CLOCK_FLIP
Definition: transpose.h:34
cudatranspose_get_video_buffer
static AVFrame * cudatranspose_get_video_buffer(AVFilterLink *inlink, int w, int h)
Definition: vf_transpose_cuda.c:508
TransposeCUDAContext::frame
AVFrame * frame
Definition: vf_transpose_cuda.c:72
av_buffer_ref
AVBufferRef * av_buffer_ref(const AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:103
TransposeCUDAContext::frames_ctx
AVBufferRef * frames_ctx
Definition: vf_transpose_cuda.c:71
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:219
AVHWFramesContext::width
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:220
cudatranspose_outputs
static const AVFilterPad cudatranspose_outputs[]
Definition: vf_transpose_cuda.c:549
video.h
TransposeCUDAContext::flip_wh
int flip_wh
Definition: vf_transpose_cuda.c:86
dummy
static int dummy
Definition: ffplay.c:3751
CHECK_CU
#define CHECK_CU(x)
Definition: vf_transpose_cuda.c:41
cudatranspose_inputs
static const AVFilterPad cudatranspose_inputs[]
Definition: vf_transpose_cuda.c:540
AV_PIX_FMT_P212
#define AV_PIX_FMT_P212
Definition: pixfmt.h:618
AV_PIX_FMT_YUV444P12MSB
#define AV_PIX_FMT_YUV444P12MSB
Definition: pixfmt.h:555
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:487
TRANSPOSE_CCLOCK
@ TRANSPOSE_CCLOCK
Definition: transpose.h:33
ff_default_get_video_buffer
AVFrame * ff_default_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:84
supported_formats
static enum AVPixelFormat supported_formats[]
Definition: vf_transpose_cuda.c:43
TransposeCUDAContext::cu_ctx
CUcontext cu_ctx
Definition: vf_transpose_cuda.c:77
cudatranspose_config_props
static int cudatranspose_config_props(AVFilterLink *outlink)
Definition: vf_transpose_cuda.c:212
AVRational::num
int num
Numerator.
Definition: rational.h:59
AVFilterPad
A filter pad used for either input or output.
Definition: filters.h:40
av_frame_alloc
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
Definition: frame.c:52
AV_PIX_FMT_YUV444P10
#define AV_PIX_FMT_YUV444P10
Definition: pixfmt.h:542
TRANSPOSE_HFLIP
@ TRANSPOSE_HFLIP
Definition: transpose.h:36
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
AVHWFramesContext::height
int height
Definition: hwcontext.h:220
FFFilter
Definition: filters.h:267
s
#define s(width, name)
Definition: cbs_vp9.c:198
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: filters.h:265
AV_PIX_FMT_YUV444P16
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:552
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
cudatranspose_rotate
static int cudatranspose_rotate(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_transpose_cuda.c:373
ff_filter_link
static FilterLink * ff_filter_link(AVFilterLink *link)
Definition: filters.h:199
AV_PIX_FMT_0BGR32
#define AV_PIX_FMT_0BGR32
Definition: pixfmt.h:516
TransposeCUDAContext::cu_func_ushort2
CUfunction cu_func_ushort2
Definition: vf_transpose_cuda.c:82
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
channels
channels
Definition: aptx.h:31
load_helper.h
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
FLAGS
#define FLAGS
Definition: vf_transpose_cuda.c:518
fail
#define fail
Definition: test.h:478
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:76
NULL
#define NULL
Definition: coverity.c:32
AVHWFramesContext::sw_format
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:213
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:599
format
New swscale design to change SwsGraph is what coordinates multiple passes These can include cascaded scaling error diffusion and so on Or we could have separate passes for the vertical and horizontal scaling In between each SwsPass lies a fully allocated image buffer Graph passes may have different levels of e g we can have a single threaded error diffusion pass following a multi threaded scaling pass SwsGraph is internally recreated whenever the image format
Definition: swscale-v2.txt:14
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
AVComponentDescriptor
Definition: pixdesc.h:30
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
AVHWFramesContext::device_ref
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
Definition: hwcontext.h:129
TransposeCUDAContext::hwctx
AVCUDADeviceContext * hwctx
Definition: vf_transpose_cuda.c:70
AV_PIX_FMT_YUV422P10
#define AV_PIX_FMT_YUV422P10
Definition: pixfmt.h:540
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
DIV_UP
#define DIV_UP(a, b)
Definition: vf_transpose_cuda.c:37
init_processing_chain
static int init_processing_chain(AVFilterContext *ctx, int out_width, int out_height)
Definition: vf_transpose_cuda.c:171
TRANSPOSE_PT_TYPE_PORTRAIT
@ TRANSPOSE_PT_TYPE_PORTRAIT
Definition: transpose.h:27
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:88
height
#define height
Definition: dsp.h:89
AV_PIX_FMT_P012
#define AV_PIX_FMT_P012
Definition: pixfmt.h:603
BLOCK_Y
#define BLOCK_Y
Definition: vf_transpose_cuda.c:39
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
TransposeCUDAContext::tmp_frame
AVFrame * tmp_frame
Definition: vf_transpose_cuda.c:73
TRANSPOSE_PT_TYPE_NONE
@ TRANSPOSE_PT_TYPE_NONE
Definition: transpose.h:25
AV_PIX_FMT_NV16
@ AV_PIX_FMT_NV16
interleaved chroma YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:198
ff_vf_transpose_cuda
const FFFilter ff_vf_transpose_cuda
Definition: vf_transpose_cuda.c:557
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:511
AV_PIX_FMT_P216
#define AV_PIX_FMT_P216
Definition: pixfmt.h:620
AV_PIX_FMT_P210
#define AV_PIX_FMT_P210
Definition: pixfmt.h:616
ff_null_get_video_buffer
AVFrame * ff_null_get_video_buffer(AVFilterLink *link, int w, int h)
Definition: video.c:44
cudatranspose_options
static const AVOption cudatranspose_options[]
Definition: vf_transpose_cuda.c:520
internal.h
common.h
av_frame_move_ref
void av_frame_move_ref(AVFrame *dst, AVFrame *src)
Move everything contained in src to dst and reset src.
Definition: frame.c:523
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:496
BLOCK_X
#define BLOCK_X
Definition: vf_transpose_cuda.c:38
TRANSPOSE_CLOCK
@ TRANSPOSE_CLOCK
Definition: transpose.h:32
av_inv_q
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
Definition: rational.h:159
TransposeCUDAContext::cu_func_ushort
CUfunction cu_func_ushort
Definition: vf_transpose_cuda.c:80
AVFilterPad::name
const char * name
Pad name.
Definition: filters.h:46
AV_PIX_FMT_P016
#define AV_PIX_FMT_P016
Definition: pixfmt.h:604
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:118
AVCUDADeviceContext
This struct is allocated as AVHWDeviceContext.hwctx.
Definition: hwcontext_cuda.h:42
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(cudatranspose)
ret
ret
Definition: filter_design.txt:187
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
AV_PIX_FMT_0RGB32
#define AV_PIX_FMT_0RGB32
Definition: pixfmt.h:515
AVHWFramesContext::device_ctx
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:137
cuda_check.h
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: filters.h:264
AVFrame::sample_aspect_ratio
AVRational sample_aspect_ratio
Sample aspect ratio for the video frame, 0/1 if unknown/unspecified.
Definition: frame.h:563
OFFSET
#define OFFSET(x)
Definition: vf_transpose_cuda.c:517
AVFrame::height
int height
Definition: frame.h:538
TRANSPOSE_CCLOCK_FLIP
@ TRANSPOSE_CCLOCK_FLIP
Definition: transpose.h:31
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:258
avfilter.h
transpose.h
TRANSPOSE_REVERSAL
@ TRANSPOSE_REVERSAL
Definition: transpose.h:35
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
src0
const pixel *const src0
Definition: h264pred_template.c:419
AVFilterContext
An instance of a filter.
Definition: avfilter.h:273
TRANSPOSE_PT_TYPE_LANDSCAPE
@ TRANSPOSE_PT_TYPE_LANDSCAPE
Definition: transpose.h:26
TRANSPOSE_VFLIP
@ TRANSPOSE_VFLIP
Definition: transpose.h:37
AV_PIX_FMT_P010
#define AV_PIX_FMT_P010
Definition: pixfmt.h:602
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:200
FFFilter::p
AVFilter p
The public AVFilter.
Definition: filters.h:271
TransposeCUDAContext::cu_func_uchar2
CUfunction cu_func_uchar2
Definition: vf_transpose_cuda.c:81
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
w
uint8_t w
Definition: llvidencdsp.c:39
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
TransposeCUDAContext::pix_desc
const AVPixFmtDescriptor * pix_desc
Definition: vf_transpose_cuda.c:75
cudatranspose_init
static av_cold int cudatranspose_init(AVFilterContext *ctx)
Definition: vf_transpose_cuda.c:91
cudatranspose_uninit
static av_cold void cudatranspose_uninit(AVFilterContext *ctx)
Definition: vf_transpose_cuda.c:106
hwcontext.h
AVFrame::linesize
int linesize[AV_NUM_DATA_POINTERS]
For video, a positive or negative value, which is typically indicating the size in bytes of each pict...
Definition: frame.h:511
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
TransposeCUDAContext::cu_func_uchar4
CUfunction cu_func_uchar4
Definition: vf_transpose_cuda.c:83
TransposeCUDAContext::passthrough
int passthrough
PassthroughType, landscape passthrough mode enabled.
Definition: vf_transpose_cuda.c:87
h
h
Definition: vp9dsp_template.c:2070
width
#define width
Definition: dsp.h:89
av_hwframe_get_buffer
int av_hwframe_get_buffer(AVBufferRef *hwframe_ref, AVFrame *frame, int flags)
Allocate a new frame attached to the given AVHWFramesContext.
Definition: hwcontext.c:506
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Special option type for declaring named constants.
Definition: opt.h:298
cudatranspose_transpose
static int cudatranspose_transpose(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
Definition: vf_transpose_cuda.c:434
av_get_pix_fmt_name
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:3376
init_hwframe_ctx
static av_cold int init_hwframe_ctx(TransposeCUDAContext *s, AVBufferRef *device_ctx, int width, int height, enum AVPixelFormat sw_format)
Definition: vf_transpose_cuda.c:123
TransposeCUDAContext
Definition: vf_transpose_cuda.c:67