[FFmpeg-devel] [PATCH v3 2/9] avcodec: add D3D12VA hardware accelerated H264 decoding
Xiang, Haihao
haihao.xiang at intel.com
Tue Jul 11 10:35:50 EEST 2023
On Vr, 2023-06-02 at 16:06 +0800, Tong Wu wrote:
> From: Wu Jianhua <toqsxw at outlook.com>
>
> The implementation is based on:
> https://learn.microsoft.com/en-us/windows/win32/medfound/direct3d-12-video-overview
>
> With the Direct3D 12 video decoding support, we can render or process
> the decoded images by the pixel shaders or compute shaders directly
> without the extra copy overhead, which is beneficial especially if you
> are trying to render or post-process a 4K or 8K video.
>
> The command below is how to enable d3d12va:
> ffmpeg -hwaccel d3d12va -i input.mp4 output.mp4
>
> Signed-off-by: Wu Jianhua <toqsxw at outlook.com>
> Signed-off-by: Tong Wu <tong1.wu at intel.com>
> ---
> configure | 2 +
> libavcodec/Makefile | 3 +
> libavcodec/d3d11va.h | 3 -
> libavcodec/d3d12va.c | 552 ++++++++++++++++++++++++++++++++++++
> libavcodec/d3d12va.h | 184 ++++++++++++
> libavcodec/d3d12va_h264.c | 210 ++++++++++++++
> libavcodec/dxva2.c | 24 ++
> libavcodec/dxva2.h | 3 -
> libavcodec/dxva2_h264.c | 12 +-
> libavcodec/dxva2_internal.h | 67 +++--
> libavcodec/h264_slice.c | 4 +
> libavcodec/h264dec.c | 3 +
> libavcodec/hwaccels.h | 1 +
> libavcodec/hwconfig.h | 2 +
> 14 files changed, 1028 insertions(+), 42 deletions(-)
> create mode 100644 libavcodec/d3d12va.c
> create mode 100644 libavcodec/d3d12va.h
> create mode 100644 libavcodec/d3d12va_h264.c
>
> diff --git a/configure b/configure
> index b86064e36f..f5dad4653f 100755
> --- a/configure
> +++ b/configure
> @@ -3033,6 +3033,8 @@ h264_d3d11va_hwaccel_deps="d3d11va"
> h264_d3d11va_hwaccel_select="h264_decoder"
> h264_d3d11va2_hwaccel_deps="d3d11va"
> h264_d3d11va2_hwaccel_select="h264_decoder"
> +h264_d3d12va_hwaccel_deps="d3d12va"
> +h264_d3d12va_hwaccel_select="h264_decoder"
> h264_dxva2_hwaccel_deps="dxva2"
> h264_dxva2_hwaccel_select="h264_decoder"
> h264_nvdec_hwaccel_deps="nvdec"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 9aacc1d477..ae143d8821 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -977,6 +977,7 @@ OBJS-$(CONFIG_ADPCM_ZORK_DECODER) += adpcm.o
> adpcm_data.o
>
> # hardware accelerators
> OBJS-$(CONFIG_D3D11VA) += dxva2.o
> +OBJS-$(CONFIG_D3D12VA) += dxva2.o d3d12va.o
> OBJS-$(CONFIG_DXVA2) += dxva2.o
> OBJS-$(CONFIG_NVDEC) += nvdec.o
> OBJS-$(CONFIG_VAAPI) += vaapi_decode.o
> @@ -994,6 +995,7 @@ OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o
> OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
> OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o
> OBJS-$(CONFIG_H264_DXVA2_HWACCEL) += dxva2_h264.o
> +OBJS-$(CONFIG_H264_D3D12VA_HWACCEL) += dxva2_h264.o d3d12va_h264.o
> OBJS-$(CONFIG_H264_NVDEC_HWACCEL) += nvdec_h264.o
> OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o
> OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o
> @@ -1277,6 +1279,7 @@ SKIPHEADERS +=
> %_tablegen.h \
>
> SKIPHEADERS-$(CONFIG_AMF) += amfenc.h
> SKIPHEADERS-$(CONFIG_D3D11VA) += d3d11va.h dxva2_internal.h
> +SKIPHEADERS-$(CONFIG_D3D12VA) += d3d12va.h
> SKIPHEADERS-$(CONFIG_DXVA2) += dxva2.h dxva2_internal.h
> SKIPHEADERS-$(CONFIG_JNI) += ffjni.h
> SKIPHEADERS-$(CONFIG_LCMS2) += fflcms2.h
> diff --git a/libavcodec/d3d11va.h b/libavcodec/d3d11va.h
> index 6816b6c1e6..27f40e5519 100644
> --- a/libavcodec/d3d11va.h
> +++ b/libavcodec/d3d11va.h
> @@ -45,9 +45,6 @@
> * @{
> */
>
> -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for
> Direct3D11 and old UVD/UVD+ ATI video cards
> -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for
> Direct3D11 and old Intel GPUs with ClearVideo interface
> -
> /**
> * This structure is used to provides the necessary configurations and data
> * to the Direct3D11 FFmpeg HWAccel implementation.
> diff --git a/libavcodec/d3d12va.c b/libavcodec/d3d12va.c
> new file mode 100644
> index 0000000000..7f1fab7251
> --- /dev/null
> +++ b/libavcodec/d3d12va.c
> @@ -0,0 +1,552 @@
> +/*
> + * Direct3D 12 HW acceleration video decoder
> + *
> + * copyright (c) 2022-2023 Wu Jianhua <toqsxw at outlook.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include <assert.h>
> +#include <string.h>
> +#include <initguid.h>
> +
> +#include "libavutil/common.h"
> +#include "libavutil/log.h"
> +#include "libavutil/time.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/hwcontext_d3d12va_internal.h"
> +#include "libavutil/hwcontext_d3d12va.h"
> +#include "avcodec.h"
> +#include "decode.h"
> +#include "d3d12va.h"
> +
> +typedef struct CommandAllocator {
> + ID3D12CommandAllocator *command_allocator;
> + uint64_t fence_value;
> +} CommandAllocator;
> +
> +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx)
> +{
> + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
> + return av_image_get_buffer_size(frames_ctx->sw_format, avctx-
> >coded_width, avctx->coded_height, 1);
> +}
> +
> +static int d3d12va_get_valid_command_allocator(AVCodecContext *avctx,
> ID3D12CommandAllocator **ppAllocator)
> +{
> + HRESULT hr;
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> + CommandAllocator allocator;
> +
> + if (av_fifo_peek(ctx->allocator_queue, &allocator, 1, 0) >= 0) {
> + uint64_t completion = ID3D12Fence_GetCompletedValue(ctx->sync_ctx-
> >fence);
> + if (completion >= allocator.fence_value) {
> + *ppAllocator = allocator.command_allocator;
> + av_fifo_read(ctx->allocator_queue, &allocator, 1);
> + return 0;
> + }
> + }
> +
> + hr = ID3D12Device_CreateCommandAllocator(ctx->device_ctx->device,
> D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
> + &IID_ID3D12CommandAllocator, ppAllocator);
> + if (FAILED(hr)) {
> + av_log(avctx, AV_LOG_ERROR, "Failed to create a new command
> allocator!\n");
> + return AVERROR(EINVAL);
> + }
> +
> + return 0;
> +}
> +
> +static int d3d12va_discard_command_allocator(AVCodecContext *avctx,
> ID3D12CommandAllocator *pAllocator, uint64_t fence_value)
> +{
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> +
> + CommandAllocator allocator = {
> + .command_allocator = pAllocator,
> + .fence_value = fence_value
> + };
> +
> + if (av_fifo_write(ctx->allocator_queue, &allocator, 1) < 0) {
> + D3D12_OBJECT_RELEASE(pAllocator);
> + return AVERROR(ENOMEM);
> + }
> +
> + return 0;
> +}
> +
> +static void bufref_free_interface(void *opaque, uint8_t *data)
> +{
> + D3D12_OBJECT_RELEASE(opaque);
> +}
> +
> +static AVBufferRef *bufref_wrap_interface(IUnknown *iface)
> +{
> + return av_buffer_create((uint8_t*)iface, 1, bufref_free_interface, iface,
> 0);
> +}
> +
> +static int d3d12va_create_buffer(AVCodecContext *avctx, UINT size,
> ID3D12Resource **ppResouce)
> +{
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> +
> + D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
> +
> + D3D12_RESOURCE_DESC desc = {
> + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
> + .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
> + .Width = size,
> + .Height = 1,
> + .DepthOrArraySize = 1,
> + .MipLevels = 1,
> + .Format = DXGI_FORMAT_UNKNOWN,
> + .SampleDesc = { .Count = 1, .Quality = 0 },
> + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
> + .Flags = D3D12_RESOURCE_FLAG_NONE,
> + };
> +
> + HRESULT hr = ID3D12Device_CreateCommittedResource(ctx->device_ctx-
> >device, &heap_props, D3D12_HEAP_FLAG_NONE,
> + &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, &IID_ID3D12Resource,
> ppResouce);
> +
> + if (FAILED(hr)) {
> + av_log(avctx, AV_LOG_ERROR, "Failed to create d3d12 buffer.\n");
> + return AVERROR(EINVAL);
> + }
> +
> + return 0;
> +}
> +
> +static int d3d12va_wait_for_gpu(AVCodecContext *avctx)
> +{
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> + AVD3D12VASyncContext *sync_ctx = ctx->sync_ctx;
> +
> + return av_d3d12va_wait_queue_idle(sync_ctx, ctx->command_queue);
> +}
> +
> +static int d3d12va_create_decoder_heap(AVCodecContext *avctx)
> +{
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
> + AVD3D12VADeviceContext *hwctx = ctx->device_ctx;
> +
> + D3D12_VIDEO_DECODER_HEAP_DESC desc = {
> + .NodeMask = 0,
> + .Configuration = ctx->cfg,
> + .DecodeWidth = frames_ctx->width,
> + .DecodeHeight = frames_ctx->height,
> + .Format = av_d3d12va_map_sw_to_hw_format(frames_ctx-
> >sw_format),
> + .FrameRate = { avctx->framerate.num, avctx->framerate.den },
> + .BitRate = avctx->bit_rate,
> + .MaxDecodePictureBufferCount = frames_ctx->initial_pool_size,
> + };
> +
> + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoderHeap(hwctx->video_device,
> &desc,
> + &IID_ID3D12VideoDecoderHeap, &ctx->decoder_heap));
> +
> + return 0;
> +
> +fail:
> + if (ctx->decoder) {
> + av_log(avctx, AV_LOG_ERROR, "D3D12 doesn't support decoding frames
> with an extent "
> + "[width(%d), height(%d)], on your device!\n", frames_ctx->width,
> frames_ctx->height);
> + }
> +
> + return AVERROR(EINVAL);
> +}
> +
> +static int d3d12va_create_decoder(AVCodecContext *avctx)
> +{
> + D3D12_VIDEO_DECODER_DESC desc;
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
> + AVD3D12VADeviceContext *hwctx = ctx->device_ctx;
> +
> + D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT feature = {
> + .NodeIndex = 0,
> + .Configuration = ctx->cfg,
> + .Width = frames_ctx->width,
> + .Height = frames_ctx->height,
> + .DecodeFormat = av_d3d12va_map_sw_to_hw_format(frames_ctx-
> >sw_format),
> + .FrameRate = { avctx->framerate.num, avctx->framerate.den },
> + .BitRate = avctx->bit_rate,
> + };
> +
> + DX_CHECK(ID3D12VideoDevice_CheckFeatureSupport(hwctx->video_device,
> D3D12_FEATURE_VIDEO_DECODE_SUPPORT, &feature, sizeof(feature)));
> + if (!(feature.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)
> ||
> + !(feature.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2)) {
> + av_log(avctx, AV_LOG_ERROR, "D3D12 decoder doesn't support on this
> device\n");
> + return AVERROR(EINVAL);
> + }
> +
> + desc = (D3D12_VIDEO_DECODER_DESC) {
> + .NodeMask = 0,
> + .Configuration = ctx->cfg,
> + };
> +
> + DX_CHECK(ID3D12VideoDevice_CreateVideoDecoder(hwctx->video_device, &desc,
> &IID_ID3D12VideoDecoder, &ctx->decoder));
> +
> + ctx->decoder_ref = bufref_wrap_interface((IUnknown *)ctx->decoder);
> + if (!ctx->decoder_ref)
> + return AVERROR(ENOMEM);
> +
> + return 0;
> +
> +fail:
> + return AVERROR(EINVAL);
> +}
> +
> +static inline int d3d12va_get_num_surfaces(enum AVCodecID codec_id)
> +{
> + int num_surfaces = 1;
> + switch (codec_id) {
> + case AV_CODEC_ID_H264:
> + case AV_CODEC_ID_HEVC:
> + num_surfaces += 16;
> + break;
> +
> + case AV_CODEC_ID_AV1:
> + num_surfaces += 12;
> + break;
> +
> + case AV_CODEC_ID_VP9:
> + num_surfaces += 8;
> + break;
> +
> + default:
> + num_surfaces += 2;
> + }
> +
> + return num_surfaces;
> +}
> +
> +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef
> *hw_frames_ctx)
> +{
> + AVHWFramesContext *frames_ctx = (AVHWFramesContext
> *)hw_frames_ctx->data;
> + AVHWDeviceContext *device_ctx = frames_ctx->device_ctx;
> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
> +
> + frames_ctx->format = AV_PIX_FMT_D3D12;
> + frames_ctx->sw_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ?
> AV_PIX_FMT_P010 : AV_PIX_FMT_NV12;
> + frames_ctx->width = avctx->width;
> + frames_ctx->height = avctx->height;
> +
> + frames_ctx->initial_pool_size = d3d12va_get_num_surfaces(avctx-
> >codec_id);
> +
> + return 0;
> +}
> +
> +int ff_d3d12va_decode_init(AVCodecContext *avctx)
> +{
> + int ret;
> + UINT bitstream_size;
> + AVHWFramesContext *frames_ctx;
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> +
> + ID3D12CommandAllocator *command_allocator = NULL;
> + D3D12_COMMAND_QUEUE_DESC queue_desc = {
> + .Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
> + .Priority = 0,
> + .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE,
> + .NodeMask = 0
> + };
> +
> + ctx->pix_fmt = avctx->hwaccel->pix_fmt;
> +
> + ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_D3D12VA);
> + if (ret < 0)
> + return ret;
> +
> + frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
> + ctx->device_ctx = (AVD3D12VADeviceContext *)frames_ctx->device_ctx-
> >hwctx;
> +
> + if (frames_ctx->format != ctx->pix_fmt) {
> + av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n");
> + goto fail;
> + }
> +
> + ret = d3d12va_create_decoder(avctx);
> + if (ret < 0)
> + goto fail;
> +
> + ret = d3d12va_create_decoder_heap(avctx);
> + if (ret < 0)
> + goto fail;
> +
> + ctx->max_num_ref = frames_ctx->initial_pool_size;
> +
> + bitstream_size = ff_d3d12va_get_suitable_max_bitstream_size(avctx);
> + ctx->buffers = av_calloc(sizeof(ID3D12Resource *), ctx->max_num_ref);
> + for (int i = 0; i < ctx->max_num_ref; i++) {
> + ret = d3d12va_create_buffer(avctx, bitstream_size, &ctx->buffers[i]);
> + if (ret < 0)
> + goto fail;
> + }
> +
> + ctx->ref_resources = av_calloc(sizeof(ID3D12Resource *), ctx-
> >max_num_ref);
> + if (!ctx->ref_resources)
> + return AVERROR(ENOMEM);
> +
> + ctx->ref_subresources = av_calloc(sizeof(UINT), ctx->max_num_ref);
> + if (!ctx->ref_subresources)
> + return AVERROR(ENOMEM);
> +
> + ctx->allocator_queue = av_fifo_alloc2(ctx->max_num_ref,
> sizeof(CommandAllocator), AV_FIFO_FLAG_AUTO_GROW);
> + if (!ctx->allocator_queue)
> + return AVERROR(ENOMEM);
> +
> + ret = av_d3d12va_sync_context_alloc(ctx->device_ctx, &ctx->sync_ctx);
> + if (ret < 0)
> + goto fail;
> +
> + ret = d3d12va_get_valid_command_allocator(avctx, &command_allocator);
> + if (ret < 0)
> + goto fail;
> +
> + DX_CHECK(ID3D12Device_CreateCommandQueue(ctx->device_ctx->device,
> &queue_desc,
> + &IID_ID3D12CommandQueue, &ctx->command_queue));
> +
> + DX_CHECK(ID3D12Device_CreateCommandList(ctx->device_ctx->device, 0,
> queue_desc.Type,
> + command_allocator, NULL, &IID_ID3D12CommandList, &ctx-
> >command_list));
> +
> + DX_CHECK(ID3D12VideoDecodeCommandList_Close(ctx->command_list));
> +
> + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1,
> (ID3D12CommandList **)&ctx->command_list);
> +
> + d3d12va_wait_for_gpu(avctx);
> +
> + d3d12va_discard_command_allocator(avctx, command_allocator, ctx-
> >sync_ctx->fence_value);
> +
> + return 0;
> +
> +fail:
> + D3D12_OBJECT_RELEASE(command_allocator);
> + ff_d3d12va_decode_uninit(avctx);
> +
> + return AVERROR(EINVAL);
> +}
> +
> +int ff_d3d12va_decode_uninit(AVCodecContext *avctx)
> +{
> + int i, num_allocator = 0;
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> + CommandAllocator allocator;
> +
> + if (ctx->sync_ctx)
> + d3d12va_wait_for_gpu(avctx);
> +
> + av_freep(&ctx->ref_resources);
> +
> + av_freep(&ctx->ref_subresources);
> +
> + for (i = 0; i < ctx->max_num_ref; i++)
> + D3D12_OBJECT_RELEASE(ctx->buffers[i]);
> +
> + av_freep(&ctx->buffers);
> +
> + D3D12_OBJECT_RELEASE(ctx->command_list);
> +
> + D3D12_OBJECT_RELEASE(ctx->command_queue);
> +
> + if (ctx->allocator_queue) {
> + while (av_fifo_read(ctx->allocator_queue, &allocator, 1) >= 0) {
> + num_allocator++;
> + D3D12_OBJECT_RELEASE(allocator.command_allocator);
> + }
> +
> + av_log(avctx, AV_LOG_VERBOSE, "Total number of command allocators
> reused: %d\n", num_allocator);
> + }
> +
> + av_fifo_freep2(&ctx->allocator_queue);
> +
> + av_d3d12va_sync_context_free(&ctx->sync_ctx);
> +
> + D3D12_OBJECT_RELEASE(ctx->decoder_heap);
> +
> + av_buffer_unref(&ctx->decoder_ref);
> +
> + return 0;
> +}
> +
> +static ID3D12Resource *get_surface(const AVFrame *frame)
> +{
> + return (ID3D12Resource *)frame->data[0];
> +}
> +
> +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *ctx, const AVFrame*
> frame)
> +{
> + return (intptr_t)frame->data[1];
> +}
> +
> +static AVD3D12VASyncContext *d3d12va_get_sync_context(const AVFrame *frame)
> +{
> + return (AVD3D12VASyncContext *)frame->data[2];
> +}
> +
> +static int d3d12va_begin_update_reference_frames(AVCodecContext *avctx,
> D3D12_RESOURCE_BARRIER *barriers, int index)
> +{
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
> +
> + int num_barrier = 0;
> +
> + for (int i = 0; i < ctx->max_num_ref; i++) {
> + if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx-
> >texture_infos[index].texture) {
> + barriers[num_barrier].Type =
> D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
> + barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
> + barriers[num_barrier].Transition =
> (D3D12_RESOURCE_TRANSITION_BARRIER){
> + .pResource = ctx->ref_resources[i],
> + .Subresource = 0,
> + .StateBefore = D3D12_RESOURCE_STATE_COMMON,
> + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,
> + };
> + num_barrier++;
> + }
> + }
> +
> + return num_barrier;
> +}
> +
> +static void d3d12va_end_update_reference_frames(AVCodecContext *avctx,
> D3D12_RESOURCE_BARRIER *barriers, int index)
> +{
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
> + int num_barrier = 0;
> +
> + for (int i = 0; i < ctx->max_num_ref; i++) {
> + if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx-
> >texture_infos[index].texture) {
> + barriers[num_barrier].Transition.pResource = ctx-
> >ref_resources[i];
> + barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
> + barriers[num_barrier].Transition.StateBefore =
> D3D12_RESOURCE_STATE_VIDEO_DECODE_READ;
> + barriers[num_barrier].Transition.StateAfter =
> D3D12_RESOURCE_STATE_COMMON;
> + num_barrier++;
> + }
> + }
> +}
> +
> +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
> + const void *pp, unsigned pp_size,
> + const void *qm, unsigned qm_size,
> + int(*update_input_arguments)(AVCodecContext *,
> D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource *))
> +{
> + int ret;
> + D3D12VADecodeContext *ctx =
> D3D12VA_DECODE_CONTEXT(avctx);
> + AVHWFramesContext *frames_ctx =
> D3D12VA_FRAMES_CONTEXT(avctx);
> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
> + ID3D12CommandAllocator *command_allocator = NULL;
> +
> + ID3D12Resource *resource = get_surface(frame);
> + UINT index = ff_d3d12va_get_surface_index(avctx, frame);
> + AVD3D12VASyncContext *sync_ctx = d3d12va_get_sync_context(frame);
> +
> + ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list;
> + D3D12_RESOURCE_BARRIER barriers[D3D12VA_MAX_SURFACES] = { 0 };
> +
> + D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS input_args = {
> + .NumFrameArguments = 2,
> + .FrameArguments = {
> + [0] = {
> + .Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
> + .Size = pp_size,
> + .pData = (void *)pp,
> + },
> + [1] = {
> + .Type =
> D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX,
> + .Size = qm_size,
> + .pData = (void *)qm,
> + },
> + },
> + .pHeap = ctx->decoder_heap,
> + };
> +
> + D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = {
> + .ConversionArguments = 0,
> + .OutputSubresource = 0,
> + .pOutputTexture2D = resource,
> + };
> +
> + UINT num_barrier = 1;
> + barriers[0] = (D3D12_RESOURCE_BARRIER) {
> + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
> + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
> + .Transition = {
> + .pResource = resource,
> + .Subresource = 0,
> + .StateBefore = D3D12_RESOURCE_STATE_COMMON,
> + .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
> + },
> + };
> +
> + memset(ctx->ref_resources, 0, sizeof(ID3D12Resource *) * ctx-
> >max_num_ref);
> + memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
> + input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
> + input_args.ReferenceFrames.ppTexture2Ds = ctx->ref_resources;
> + input_args.ReferenceFrames.pSubresources = ctx->ref_subresources;
> +
> + av_d3d12va_wait_idle(sync_ctx);
> +
> + if (!qm)
> + input_args.NumFrameArguments = 1;
> +
> + ret = update_input_arguments(avctx, &input_args, ctx->buffers[index]);
> + if (ret < 0)
> + return ret;
> +
> + ret = d3d12va_get_valid_command_allocator(avctx, &command_allocator);
> + if (ret < 0)
> + goto fail;
> +
> + DX_CHECK(ID3D12CommandAllocator_Reset(command_allocator));
> +
> + DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list,
> command_allocator));
> +
> + num_barrier += d3d12va_begin_update_reference_frames(avctx, &barriers[1],
> index);
> +
> + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier,
> barriers);
> +
> + ID3D12VideoDecodeCommandList_DecodeFrame(cmd_list, ctx->decoder,
> &output_args, &input_args);
> +
> + barriers[0].Transition.StateBefore = barriers[0].Transition.StateAfter;
> + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
> + d3d12va_end_update_reference_frames(avctx, &barriers[1], index);
> +
> + ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier,
> barriers);
> +
> + DX_CHECK(ID3D12VideoDecodeCommandList_Close(cmd_list));
> +
> + ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1,
> (ID3D12CommandList **)&ctx->command_list);
> +
> + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, sync_ctx->fence,
> ++sync_ctx->fence_value));
> +
> + DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx-
> >fence, ++ctx->sync_ctx->fence_value));
> +
> + ret = d3d12va_discard_command_allocator(avctx, command_allocator, ctx-
> >sync_ctx->fence_value);
> + if (ret < 0)
> + return ret;
> +
> + if (ctx->device_ctx->sync) {
> + ret = av_d3d12va_wait_idle(ctx->sync_ctx);
> + if (ret < 0)
> + return ret;
> + }
> +
> + return 0;
> +
> +fail:
> + if (command_allocator)
> + d3d12va_discard_command_allocator(avctx, command_allocator, ctx-
> >sync_ctx->fence_value);
> + return AVERROR(EINVAL);
> +}
> diff --git a/libavcodec/d3d12va.h b/libavcodec/d3d12va.h
> new file mode 100644
> index 0000000000..da3e7b7ab9
> --- /dev/null
> +++ b/libavcodec/d3d12va.h
> @@ -0,0 +1,184 @@
> +/*
> + * Direct3D 12 HW acceleration video decoder
> + *
> + * copyright (c) 2022-2023 Wu Jianhua <toqsxw at outlook.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#ifndef AVCODEC_D3D12VA_H
> +#define AVCODEC_D3D12VA_H
> +
> +#include "libavutil/fifo.h"
> +#include "libavutil/hwcontext.h"
> +#include "libavutil/hwcontext_d3d12va.h"
> +#include "avcodec.h"
> +#include "internal.h"
> +
> +/**
> + * @brief This structure is used to provides the necessary configurations and
> data
> + * to the FFmpeg Direct3D 12 HWAccel implementation for video decoder.
> + *
> + * The application must make it available as AVCodecContext.hwaccel_context.
> + */
Does d3d12va support AVCodecContext.hwaccel_context ? If yes, you should make
this file public, modify the structures and remove all ff_ functions, otherwise
user can't use these structures in an application. If no, please remove the
above comment and better to rename this file to d3d12va_decode.h because the
structures and functions in this file are for d3d12va decoders.
Thanks
Haihao
> +typedef struct D3D12VADecodeContext {
> + AVBufferRef *decoder_ref;
> +
> + /**
> + * D3D12 video decoder
> + */
> + ID3D12VideoDecoder *decoder;
> +
> + /**
> + * D3D12 video decoder heap
> + */
> + ID3D12VideoDecoderHeap *decoder_heap;
> +
> + /**
> + * D3D12 configuration used to create the decoder
> + *
> + * Specified by decoders
> + */
> + D3D12_VIDEO_DECODE_CONFIGURATION cfg;
> +
> + /**
> + * A cached queue for reusing the D3D12 command allocators
> + *
> + * @see
> https://learn.microsoft.com/en-us/windows/win32/direct3d12/recording-command-lists-and-bundles#id3d12commandallocator
> + */
> + AVFifo *allocator_queue;
> +
> + /**
> + * D3D12 command queue
> + */
> + ID3D12CommandQueue *command_queue;
> +
> + /**
> + * D3D12 video decode command list
> + */
> + ID3D12VideoDecodeCommandList *command_list;
> +
> + /**
> + * The array of buffer resources used to upload compressed bitstream
> + *
> + * The buffers.length is the same as D3D12VADecodeContext.max_num_ref
> + */
> + ID3D12Resource **buffers;
> +
> + /**
> + * The array of resources used for reference frames
> + *
> + * The ref_resources.length is the same as
> D3D12VADecodeContext.max_num_ref
> + */
> + ID3D12Resource **ref_resources;
> +
> + /**
> + * The array of subresources used for reference frames
> + *
> + * The ref_subresources.length is the same as
> D3D12VADecodeContext.max_num_ref
> + */
> + UINT *ref_subresources;
> +
> + /**
> + * Maximum number of reference frames
> + */
> + UINT max_num_ref;
> +
> + /**
> + * The sync context used to sync command queue
> + */
> + AVD3D12VASyncContext *sync_ctx;
> +
> + /**
> + * A pointer to AVD3D12VADeviceContext used to create D3D12 objects
> + */
> + AVD3D12VADeviceContext *device_ctx;
> +
> + /**
> + * Pixel format
> + */
> + enum AVPixelFormat pix_fmt;
> +
> + /**
> + * Private to the FFmpeg AVHWAccel implementation
> + */
> + unsigned report_id;
> +} D3D12VADecodeContext;
> +
> +/**
> + * @}
> + */
> +
> +#define D3D12VA_DECODE_CONTEXT(avctx) ((D3D12VADecodeContext *)((avctx)-
> >internal->hwaccel_priv_data))
> +#define D3D12VA_FRAMES_CONTEXT(avctx) ((AVHWFramesContext *)(avctx)-
> >hw_frames_ctx->data)
> +
> +/**
> + * @brief Get a suitable maximum bitstream size
> + *
> + * Creating and destroying a resource on d3d12 needs sync and reallocation,
> so use this function
> + * to help allocate a big enough bitstream buffer to avoid recreating
> resources when decoding.
> + *
> + * @return the suitable size
> + */
> +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx);
> +
> +/**
> + * @brief init D3D12VADecodeContext
> + *
> + * @return Error code (ret < 0 if failed)
> + */
> +int ff_d3d12va_decode_init(AVCodecContext *avctx);
> +
> +/**
> + * @brief uninit D3D12VADecodeContext
> + *
> + * @return Error code (ret < 0 if failed)
> + */
> +int ff_d3d12va_decode_uninit(AVCodecContext *avctx);
> +
> +/**
> + * @brief d3d12va common frame params
> + *
> + * @return Error code (ret < 0 if failed)
> + */
> +int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef
> *hw_frames_ctx);
> +
> +/**
> + * @brief d3d12va common end frame
> + *
> + * @param avctx codec context
> + * @param frame current output frame
> + * @param pp picture parameters
> + * @param pp_size the size of the picture parameters
> + * @param qm quantization matrix
> + * @param qm_size the size of the quantization matrix
> + * @param callback update decoder-specified input stream arguments
> + * @return Error code (ret < 0 if failed)
> + */
> +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
> + const void *pp, unsigned pp_size,
> + const void *qm, unsigned qm_size,
> + int(*)(AVCodecContext *, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *,
> ID3D12Resource *));
> +
> +/**
> + * @brief get surface index
> + *
> + * @return index
> + */
> +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *avctx, const AVFrame
> *frame);
> +
> +#endif /* AVCODEC_D3D12VA_DEC_H */
> diff --git a/libavcodec/d3d12va_h264.c b/libavcodec/d3d12va_h264.c
> new file mode 100644
> index 0000000000..0810a034b4
> --- /dev/null
> +++ b/libavcodec/d3d12va_h264.c
> @@ -0,0 +1,210 @@
> +/*
> + * Direct3D 12 h264 HW acceleration
> + *
> + * copyright (c) 2022-2023 Wu Jianhua <toqsxw at outlook.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include "config_components.h"
> +#include "libavutil/avassert.h"
> +#include "h264dec.h"
> +#include "h264data.h"
> +#include "h264_ps.h"
> +#include "mpegutils.h"
> +#include "dxva2_internal.h"
> +#include "d3d12va.h"
> +#include "libavutil/hwcontext_d3d12va_internal.h"
> +#include <dxva.h>
> +
> +typedef struct H264DecodePictureContext {
> + DXVA_PicParams_H264 pp;
> + DXVA_Qmatrix_H264 qm;
> + unsigned slice_count;
> + DXVA_Slice_H264_Short slice_short[MAX_SLICES];
> + const uint8_t *bitstream;
> + unsigned bitstream_size;
> +} H264DecodePictureContext;
> +
> +static void fill_slice_short(DXVA_Slice_H264_Short *slice,
> + unsigned position, unsigned size)
> +{
> + memset(slice, 0, sizeof(*slice));
> + slice->BSNALunitDataLocation = position;
> + slice->SliceBytesInBuffer = size;
> + slice->wBadSliceChopping = 0;
> +}
> +
> +static int d3d12va_h264_start_frame(AVCodecContext *avctx,
> + av_unused const uint8_t *buffer,
> + av_unused uint32_t size)
> +{
> + const H264Context *h = avctx->priv_data;
> + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr-
> >hwaccel_picture_private;
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> +
> + if (!ctx)
> + return -1;
> +
> + assert(ctx_pic);
> +
> + ff_dxva2_h264_fill_picture_parameters(avctx, (AVDXVAContext *)ctx,
> &ctx_pic->pp);
> +
> + ff_dxva2_h264_fill_scaling_lists(avctx, (AVDXVAContext *)ctx, &ctx_pic-
> >qm);
> +
> + ctx_pic->slice_count = 0;
> + ctx_pic->bitstream_size = 0;
> + ctx_pic->bitstream = NULL;
> +
> + return 0;
> +}
> +
> +static int d3d12va_h264_decode_slice(AVCodecContext *avctx, const uint8_t
> *buffer, uint32_t size)
> +{
> + unsigned position;
> + const H264Context *h = avctx->priv_data;
> + const H264SliceContext *sl = &h->slice_ctx[0];
> + const H264Picture *current_picture = h->cur_pic_ptr;
> + H264DecodePictureContext *ctx_pic = current_picture-
> >hwaccel_picture_private;
> +
> + if (ctx_pic->slice_count >= MAX_SLICES)
> + return AVERROR(ERANGE);
> +
> + if (!ctx_pic->bitstream)
> + ctx_pic->bitstream = buffer;
> + ctx_pic->bitstream_size += size;
> +
> + position = buffer - ctx_pic->bitstream;
> + fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], position,
> size);
> + ctx_pic->slice_count++;
> +
> + if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type !=
> AV_PICTURE_TYPE_SI)
> + ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */
> +
> + return 0;
> +}
> +
> +#define START_CODE 65536
> +#define START_CODE_SIZE 3
> +static int update_input_arguments(AVCodecContext *avctx,
> D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *input_args, ID3D12Resource *buffer)
> +{
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> + AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
> + AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
> +
> + const H264Context *h = avctx->priv_data;
> + const H264Picture *current_picture = h->cur_pic_ptr;
> + H264DecodePictureContext *ctx_pic = current_picture-
> >hwaccel_picture_private;
> +
> + int i, index;
> + uint8_t *mapped_data, *mapped_ptr;
> + DXVA_Slice_H264_Short *slice;
> + D3D12_VIDEO_DECODE_FRAME_ARGUMENT *args;
> +
> + if (FAILED(ID3D12Resource_Map(buffer, 0, NULL, &mapped_data))) {
> + av_log(avctx, AV_LOG_ERROR, "Failed to map D3D12 Buffer
> resource!\n");
> + return AVERROR(EINVAL);
> + }
> +
> + mapped_ptr = mapped_data;
> + for (i = 0; i < ctx_pic->slice_count; i++) {
> + UINT position, size;
> + slice = &ctx_pic->slice_short[i];
> +
> + position = slice->BSNALunitDataLocation;
> + size = slice->SliceBytesInBuffer;
> +
> + slice->SliceBytesInBuffer += START_CODE_SIZE;
> + slice->BSNALunitDataLocation = mapped_ptr - mapped_data;
> +
> + *(uint32_t *)mapped_ptr = START_CODE;
> + mapped_ptr += START_CODE_SIZE;
> +
> + memcpy(mapped_ptr, &ctx_pic->bitstream[position], size);
> + mapped_ptr += size;
> + }
> +
> + ID3D12Resource_Unmap(buffer, 0, NULL);
> +
> + input_args->CompressedBitstream =
> (D3D12_VIDEO_DECODE_COMPRESSED_BITSTREAM){
> + .pBuffer = buffer,
> + .Offset = 0,
> + .Size = mapped_ptr - mapped_data,
> + };
> +
> + args = &input_args->FrameArguments[input_args->NumFrameArguments++];
> + args->Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL;
> + args->Size = sizeof(DXVA_Slice_H264_Short) * ctx_pic->slice_count;
> + args->pData = ctx_pic->slice_short;
> +
> + index = ctx_pic->pp.CurrPic.Index7Bits;
> + ctx->ref_resources[index] = frames_hwctx->texture_infos[index].texture;
> + for (i = 0; i < FF_ARRAY_ELEMS(ctx_pic->pp.RefFrameList); i++) {
> + index = ctx_pic->pp.RefFrameList[i].Index7Bits;
> + if (index != 0x7f)
> + ctx->ref_resources[index] = frames_hwctx-
> >texture_infos[index].texture;
> + }
> +
> + return 0;
> +}
> +
> +static int d3d12va_h264_end_frame(AVCodecContext *avctx)
> +{
> + H264Context *h = avctx->priv_data;
> + H264DecodePictureContext *ctx_pic = h->cur_pic_ptr-
> >hwaccel_picture_private;
> + H264SliceContext *sl = &h->slice_ctx[0];
> +
> + int ret;
> +
> + if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0)
> + return -1;
> +
> + ret = ff_d3d12va_common_end_frame(avctx, h->cur_pic_ptr->f,
> + &ctx_pic->pp, sizeof(ctx_pic->pp),
> + &ctx_pic->qm, sizeof(ctx_pic->qm),
> + update_input_arguments);
> + if (!ret)
> + ff_h264_draw_horiz_band(h, sl, 0, h->avctx->height);
> +
> + return ret;
> +}
> +
> +static int d3d12va_h264_decode_init(AVCodecContext *avctx)
> +{
> + D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> +
> + ctx->cfg.DecodeProfile = D3D12_VIDEO_DECODE_PROFILE_H264;
> +
> + return ff_d3d12va_decode_init(avctx);
> +}
> +
> +#if CONFIG_H264_D3D12VA_HWACCEL
> +const AVHWAccel ff_h264_d3d12va_hwaccel = {
> + .name = "h264_d3d12va",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .id = AV_CODEC_ID_H264,
> + .pix_fmt = AV_PIX_FMT_D3D12,
> + .init = d3d12va_h264_decode_init,
> + .uninit = ff_d3d12va_decode_uninit,
> + .start_frame = d3d12va_h264_start_frame,
> + .decode_slice = d3d12va_h264_decode_slice,
> + .end_frame = d3d12va_h264_end_frame,
> + .frame_params = ff_d3d12va_common_frame_params,
> + .frame_priv_data_size = sizeof(H264DecodePictureContext),
> + .priv_data_size = sizeof(D3D12VADecodeContext),
> +};
> +#endif
> diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
> index 568d686f39..b22ea3e8f2 100644
> --- a/libavcodec/dxva2.c
> +++ b/libavcodec/dxva2.c
> @@ -774,6 +774,10 @@ unsigned ff_dxva2_get_surface_index(const AVCodecContext
> *avctx,
> void *surface = get_surface(avctx, frame);
> unsigned i;
>
> +#if CONFIG_D3D12VA
> + if (avctx->pix_fmt == AV_PIX_FMT_D3D12)
> + return (intptr_t)frame->data[1];
> +#endif
> #if CONFIG_D3D11VA
> if (avctx->pix_fmt == AV_PIX_FMT_D3D11)
> return (intptr_t)frame->data[1];
> @@ -1056,3 +1060,23 @@ int ff_dxva2_is_d3d11(const AVCodecContext *avctx)
> else
> return 0;
> }
> +
> +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext
> *ctx)
> +{
> + unsigned *report_id = NULL;
> +
> +#if CONFIG_D3D12VA
> + if (avctx->pix_fmt == AV_PIX_FMT_D3D12)
> + report_id = &ctx->d3d12va.report_id;
> +#endif
> +#if CONFIG_D3D11VA
> + if (ff_dxva2_is_d3d11(avctx))
> + report_id = &ctx->d3d11va.report_id;
> +#endif
> +#if CONFIG_DXVA2
> + if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
> + report_id = &ctx->dxva2.report_id;
> +#endif
> +
> + return report_id;
> +}
> diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h
> index 22c93992f2..bdec6112e9 100644
> --- a/libavcodec/dxva2.h
> +++ b/libavcodec/dxva2.h
> @@ -45,9 +45,6 @@
> * @{
> */
>
> -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2
> and old UVD/UVD+ ATI video cards
> -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2
> and old Intel GPUs with ClearVideo interface
> -
> /**
> * This structure is used to provides the necessary configurations and data
> * to the DXVA2 FFmpeg HWAccel implementation.
> diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
> index 6300b1418d..7a076ea981 100644
> --- a/libavcodec/dxva2_h264.c
> +++ b/libavcodec/dxva2_h264.c
> @@ -47,9 +47,10 @@ static void fill_picture_entry(DXVA_PicEntry_H264 *pic,
> pic->bPicEntry = index | (flag << 7);
> }
>
> -static void fill_picture_parameters(const AVCodecContext *avctx,
> AVDXVAContext *ctx, const H264Context *h,
> +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx,
> AVDXVAContext *ctx,
> DXVA_PicParams_H264 *pp)
> {
> + const H264Context *h = avctx->priv_data;
> const H264Picture *current_picture = h->cur_pic_ptr;
> const SPS *sps = h->ps.sps;
> const PPS *pps = h->ps.pps;
> @@ -163,9 +164,10 @@ static void fill_picture_parameters(const AVCodecContext
> *avctx, AVDXVAContext *
> //pp->SliceGroupMap[810]; /* XXX not implemented by FFmpeg
> */
> }
>
> -static void fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext
> *ctx, const H264Context *h, DXVA_Qmatrix_H264 *qm)
> +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx,
> AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm)
> {
> - const PPS *pps = h->ps.pps;
> + const H264Context *h = avctx->priv_data;
> + const PPS *pps = h->ps.pps;
> unsigned i, j;
> memset(qm, 0, sizeof(*qm));
> if (DXVA_CONTEXT_WORKAROUND(avctx, ctx) &
> FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) {
> @@ -453,10 +455,10 @@ static int dxva2_h264_start_frame(AVCodecContext *avctx,
> assert(ctx_pic);
>
> /* Fill up DXVA_PicParams_H264 */
> - fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp);
> + ff_dxva2_h264_fill_picture_parameters(avctx, ctx, &ctx_pic->pp);
>
> /* Fill up DXVA_Qmatrix_H264 */
> - fill_scaling_lists(avctx, ctx, h, &ctx_pic->qm);
> + ff_dxva2_h264_fill_scaling_lists(avctx, ctx, &ctx_pic->qm);
>
> ctx_pic->slice_count = 0;
> ctx_pic->bitstream_size = 0;
> diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
> index b822af59cd..a9a1fc090e 100644
> --- a/libavcodec/dxva2_internal.h
> +++ b/libavcodec/dxva2_internal.h
> @@ -26,18 +26,34 @@
> #define COBJMACROS
>
> #include "config.h"
> +#include "config_components.h"
>
> /* define the proper COM entries before forcing desktop APIs */
> #include <objbase.h>
>
> +#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for
> DXVA2/Direct3D11 and old UVD/UVD+ ATI video cards
> +#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for
> DXVA2/Direct3D11 and old Intel GPUs with ClearVideo interface
> +
> #if CONFIG_DXVA2
> #include "dxva2.h"
> #include "libavutil/hwcontext_dxva2.h"
> +#define DXVA2_VAR(ctx, var) ctx->dxva2.var
> +#else
> +#define DXVA2_VAR(ctx, var) 0
> #endif
> +
> #if CONFIG_D3D11VA
> #include "d3d11va.h"
> #include "libavutil/hwcontext_d3d11va.h"
> +#define D3D11VA_VAR(ctx, var) ctx->d3d11va.var
> +#else
> +#define D3D11VA_VAR(ctx, var) 0
> +#endif
> +
> +#if CONFIG_D3D12VA
> +#include "d3d12va.h"
> #endif
> +
> #if HAVE_DXVA_H
> /* When targeting WINAPI_FAMILY_PHONE_APP or WINAPI_FAMILY_APP, dxva.h
> * defines nothing. Force the struct definitions to be visible. */
> @@ -62,6 +78,9 @@ typedef union {
> #if CONFIG_DXVA2
> struct dxva_context dxva2;
> #endif
> +#if CONFIG_D3D12VA
> + struct D3D12VADecodeContext d3d12va;
> +#endif
> } AVDXVAContext;
>
> typedef struct FFDXVASharedContext {
> @@ -101,39 +120,19 @@ typedef struct FFDXVASharedContext {
> #define D3D11VA_CONTEXT(ctx) (&ctx->d3d11va)
> #define DXVA2_CONTEXT(ctx) (&ctx->dxva2)
>
> -#if CONFIG_D3D11VA && CONFIG_DXVA2
> -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ?
> ctx->d3d11va.workaround : ctx->dxva2.workaround)
> -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ?
> ctx->d3d11va.surface_count : ctx->dxva2.surface_count)
> -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ?
> (void *)ctx->d3d11va.decoder : (void *)ctx->dxva2.decoder)
> -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(ff_dxva2_is_d3d11(avctx) ?
> &ctx->d3d11va.report_id : &ctx->dxva2.report_id))
> -#define DXVA_CONTEXT_CFG(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ?
> (void *)ctx->d3d11va.cfg : (void *)ctx->dxva2.cfg)
> -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ?
> ctx->d3d11va.cfg->ConfigBitstreamRaw : ctx->dxva2.cfg->ConfigBitstreamRaw)
> -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ?
> ctx->d3d11va.cfg->ConfigIntraResidUnsigned : ctx->dxva2.cfg-
> >ConfigIntraResidUnsigned)
> -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ff_dxva2_is_d3d11(avctx) ?
> ctx->d3d11va.cfg->ConfigResidDiffAccelerator : ctx->dxva2.cfg-
> >ConfigResidDiffAccelerator)
> +#define DXVA2_CONTEXT_VAR(avctx, ctx, var) (avctx->pix_fmt ==
> AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? D3D11VA_VAR(ctx, var) :
> DXVA2_VAR(ctx, var)))
> +
> +#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)
> (*ff_dxva2_get_report_id(avctx, ctx))
> +#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx,
> workaround)
> +#define DXVA_CONTEXT_COUNT(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx,
> surface_count)
> +#define DXVA_CONTEXT_DECODER(avctx, ctx) (avctx->pix_fmt ==
> AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx,
> decoder) : (void *)DXVA2_VAR(ctx, decoder)))
> +#define DXVA_CONTEXT_CFG(avctx, ctx) (avctx->pix_fmt ==
> AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void *)D3D11VA_VAR(ctx,
> cfg) : (void *)DXVA2_VAR(ctx, cfg)))
> +#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx,
> cfg->ConfigBitstreamRaw)
> +#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx,
> cfg->ConfigIntraResidUnsigned)
> +#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) DXVA2_CONTEXT_VAR(avctx, ctx,
> cfg->ConfigResidDiffAccelerator)
> #define DXVA_CONTEXT_VALID(avctx, ctx) (DXVA_CONTEXT_DECODER(avctx,
> ctx) && \
> DXVA_CONTEXT_CFG(avctx,
> ctx) && \
> - (ff_dxva2_is_d3d11(avctx) ||
> ctx->dxva2.surface_count))
> -#elif CONFIG_DXVA2
> -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx->dxva2.workaround)
> -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx->dxva2.surface_count)
> -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->dxva2.decoder)
> -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx->dxva2.report_id))
> -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->dxva2.cfg)
> -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->dxva2.cfg-
> >ConfigBitstreamRaw)
> -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->dxva2.cfg-
> >ConfigIntraResidUnsigned)
> -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->dxva2.cfg-
> >ConfigResidDiffAccelerator)
> -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->dxva2.decoder && ctx-
> >dxva2.cfg && ctx->dxva2.surface_count)
> -#elif CONFIG_D3D11VA
> -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx) (ctx->d3d11va.workaround)
> -#define DXVA_CONTEXT_COUNT(avctx, ctx) (ctx->d3d11va.surface_count)
> -#define DXVA_CONTEXT_DECODER(avctx, ctx) (ctx->d3d11va.decoder)
> -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx) (*(&ctx->d3d11va.report_id))
> -#define DXVA_CONTEXT_CFG(avctx, ctx) (ctx->d3d11va.cfg)
> -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx) (ctx->d3d11va.cfg-
> >ConfigBitstreamRaw)
> -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->d3d11va.cfg-
> >ConfigIntraResidUnsigned)
> -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->d3d11va.cfg-
> >ConfigResidDiffAccelerator)
> -#define DXVA_CONTEXT_VALID(avctx, ctx) (ctx->d3d11va.decoder && ctx-
> >d3d11va.cfg)
> -#endif
> + (ff_dxva2_is_d3d11(avctx) ||
> DXVA2_VAR(ctx, surface_count)))
>
> unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx,
> const AVDXVAContext *,
> @@ -161,4 +160,10 @@ int ff_dxva2_common_frame_params(AVCodecContext *avctx,
>
> int ff_dxva2_is_d3d11(const AVCodecContext *avctx);
>
> +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx, AVDXVAContext
> *ctx);
> +
> +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx,
> AVDXVAContext *ctx, DXVA_PicParams_H264 *pp);
> +
> +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx,
> AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm);
> +
> #endif /* AVCODEC_DXVA2_INTERNAL_H */
> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
> index 41bf30eefc..df70ad8a2f 100644
> --- a/libavcodec/h264_slice.c
> +++ b/libavcodec/h264_slice.c
> @@ -778,6 +778,7 @@ static enum AVPixelFormat get_pixel_format(H264Context *h,
> int force_callback)
> {
> #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \
> (CONFIG_H264_D3D11VA_HWACCEL * 2) + \
> + CONFIG_H264_D3D12VA_HWACCEL + \
> CONFIG_H264_NVDEC_HWACCEL + \
> CONFIG_H264_VAAPI_HWACCEL + \
> CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \
> @@ -883,6 +884,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h,
> int force_callback)
> *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
> *fmt++ = AV_PIX_FMT_D3D11;
> #endif
> +#if CONFIG_H264_D3D12VA_HWACCEL
> + *fmt++ = AV_PIX_FMT_D3D12;
> +#endif
> #if CONFIG_H264_VAAPI_HWACCEL
> *fmt++ = AV_PIX_FMT_VAAPI;
> #endif
> diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
> index 19f8dba131..853d3262f7 100644
> --- a/libavcodec/h264dec.c
> +++ b/libavcodec/h264dec.c
> @@ -1089,6 +1089,9 @@ const FFCodec ff_h264_decoder = {
> #if CONFIG_H264_D3D11VA2_HWACCEL
> HWACCEL_D3D11VA2(h264),
> #endif
> +#if CONFIG_H264_D3D12VA_HWACCEL
> + HWACCEL_D3D12VA(h264),
> +#endif
> #if CONFIG_H264_NVDEC_HWACCEL
> HWACCEL_NVDEC(h264),
> #endif
> diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
> index 48dfc17f72..be54604b81 100644
> --- a/libavcodec/hwaccels.h
> +++ b/libavcodec/hwaccels.h
> @@ -32,6 +32,7 @@ extern const AVHWAccel ff_h263_vaapi_hwaccel;
> extern const AVHWAccel ff_h263_videotoolbox_hwaccel;
> extern const AVHWAccel ff_h264_d3d11va_hwaccel;
> extern const AVHWAccel ff_h264_d3d11va2_hwaccel;
> +extern const AVHWAccel ff_h264_d3d12va_hwaccel;
> extern const AVHWAccel ff_h264_dxva2_hwaccel;
> extern const AVHWAccel ff_h264_nvdec_hwaccel;
> extern const AVHWAccel ff_h264_vaapi_hwaccel;
> diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
> index e8c6186151..e20118c096 100644
> --- a/libavcodec/hwconfig.h
> +++ b/libavcodec/hwconfig.h
> @@ -82,6 +82,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx);
> HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ##
> _vulkan_hwaccel)
> #define HWACCEL_D3D11VA(codec) \
> HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ##
> _d3d11va_hwaccel)
> +#define HWACCEL_D3D12VA(codec) \
> + HW_CONFIG_HWACCEL(1, 1, 0, D3D12, D3D12VA, ff_ ## codec ##
> _d3d12va_hwaccel)
>
> #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, device_type_) \
> &(const AVCodecHWConfigInternal) { \
More information about the ffmpeg-devel
mailing list