[FFmpeg-devel] [PATCH 2/6] avcodec/nvdec: avoid needless copy of output frame
Timo Rothenpieler
timo at rothenpieler.org
Tue May 8 16:31:28 EEST 2018
Replaces the data pointers with the mapped cuvid ones.
Adds buffer_refs to the frame to ensure the needed contexts stay alive
and the cuvid idx stays allocated.
Adds another buffer_ref to unmap the frame when it's unreferenced itself.
---
libavcodec/nvdec.c | 83 +++++++++++++++++++++++++++++++++-------------
1 file changed, 60 insertions(+), 23 deletions(-)
diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
index ab3cb88b27..d98f9dd95e 100644
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@@ -308,7 +308,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
params.CodecType = cuvid_codec_type;
params.ChromaFormat = cuvid_chroma_format;
params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
- params.ulNumOutputSurfaces = 1;
+ params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, ¶ms, avctx);
if (ret < 0) {
@@ -354,6 +354,32 @@ static void nvdec_fdd_priv_free(void *priv)
av_freep(&priv);
}
+static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
+{
+ NVDECFrame *unmap_data = (NVDECFrame*)data;
+ NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
+ CUdeviceptr devptr = (CUdeviceptr)opaque;
+ CUresult err;
+ CUcontext dummy;
+
+ err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
+ if (err != CUDA_SUCCESS) {
+ av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ goto finish;
+ }
+
+ err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
+ if (err != CUDA_SUCCESS)
+ av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
+
+ decoder->cudl->cuCtxPopCurrent(&dummy);
+
+finish:
+ av_buffer_unref(&unmap_data->idx_ref);
+ av_buffer_unref(&unmap_data->decoder_ref);
+ av_free(unmap_data);
+}
+
static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
{
FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
@@ -361,6 +387,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
CUVIDPROCPARAMS vpp = { .progressive_frame = 1 };
+ NVDECFrame *unmap_data = NULL;
CUresult err;
CUcontext dummy;
@@ -383,32 +410,39 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
goto finish;
}
- for (i = 0; frame->data[i]; i++) {
- CUDA_MEMCPY2D cpy = {
- .srcMemoryType = CU_MEMORYTYPE_DEVICE,
- .dstMemoryType = CU_MEMORYTYPE_DEVICE,
- .srcDevice = devptr,
- .dstDevice = (CUdeviceptr)frame->data[i],
- .srcPitch = pitch,
- .dstPitch = frame->linesize[i],
- .srcY = offset,
- .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
- .Height = frame->height >> (i ? 1 : 0),
- };
-
- err = decoder->cudl->cuMemcpy2D(&cpy);
- if (err != CUDA_SUCCESS) {
- av_log(logctx, AV_LOG_ERROR, "Error copying decoded frame: %d\n",
- err);
- ret = AVERROR_UNKNOWN;
- goto copy_fail;
- }
+ unmap_data = av_mallocz(sizeof(*unmap_data));
+ if (!unmap_data) {
+ ret = AVERROR(ENOMEM);
+ goto copy_fail;
+ }
- offset += cpy.Height;
+ frame->buf[1] = av_buffer_create((uint8_t *)unmap_data, sizeof(*unmap_data),
+ nvdec_unmap_mapped_frame, (void*)devptr,
+ AV_BUFFER_FLAG_READONLY);
+ if (!frame->buf[1]) {
+ ret = AVERROR(ENOMEM);
+ goto copy_fail;
}
+ unmap_data->idx = cf->idx;
+ unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
+ unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
+
+ for (i = 0; frame->linesize[i]; i++) {
+ frame->data[i] = (uint8_t*)(devptr + offset);
+ frame->linesize[i] = pitch;
+ offset += pitch * (frame->height >> (i ? 1 : 0));
+ }
+
+ goto finish;
+
copy_fail:
- decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
+ if (!frame->buf[1]) {
+ decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
+ av_freep(&unmap_data);
+ } else {
+ av_buffer_unref(&frame->buf[1]);
+ }
finish:
decoder->cudl->cuCtxPopCurrent(&dummy);
@@ -526,6 +560,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
int dpb_size)
{
AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
+ AVCUDAFramesContext *hwctx = (AVCUDAFramesContext*)frames_ctx->hwctx;
const AVPixFmtDescriptor *sw_desc;
int cuvid_codec_type, cuvid_chroma_format;
@@ -550,6 +585,8 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
frames_ctx->height = (avctx->coded_height + 1) & ~1;
frames_ctx->initial_pool_size = dpb_size;
+ hwctx->flags = AV_CUDA_HWFRAMES_DUMMY_MODE;
+
switch (sw_desc->comp[0].depth) {
case 8:
frames_ctx->sw_format = AV_PIX_FMT_NV12;
--
2.17.0
More information about the ffmpeg-devel
mailing list