[FFmpeg-devel] [PATCH v3 2/9] avcodec: add D3D12VA hardware accelerated H264 decoding

Wu, Tong1 tong1.wu at intel.com
Wed Jul 12 13:17:58 EEST 2023


>On Vr, 2023-06-02 at 16:06 +0800, Tong Wu wrote:
>> From: Wu Jianhua <toqsxw at outlook.com>
>>
>> The implementation is based on:
>> https://learn.microsoft.com/en-us/windows/win32/medfound/direct3d-12-
>video-overview
>>
>> With the Direct3D 12 video decoding support, we can render or process
>> the decoded images by the pixel shaders or compute shaders directly
>> without the extra copy overhead, which is beneficial especially if you
>> are trying to render or post-process a 4K or 8K video.
>>
>> The command below is how to enable d3d12va:
>> ffmpeg -hwaccel d3d12va -i input.mp4 output.mp4
>>
>> Signed-off-by: Wu Jianhua <toqsxw at outlook.com>
>> Signed-off-by: Tong Wu <tong1.wu at intel.com>
>> ---
>>  configure                   |   2 +
>>  libavcodec/Makefile         |   3 +
>>  libavcodec/d3d11va.h        |   3 -
>>  libavcodec/d3d12va.c        | 552 ++++++++++++++++++++++++++++++++++++
>>  libavcodec/d3d12va.h        | 184 ++++++++++++
>>  libavcodec/d3d12va_h264.c   | 210 ++++++++++++++
>>  libavcodec/dxva2.c          |  24 ++
>>  libavcodec/dxva2.h          |   3 -
>>  libavcodec/dxva2_h264.c     |  12 +-
>>  libavcodec/dxva2_internal.h |  67 +++--
>>  libavcodec/h264_slice.c     |   4 +
>>  libavcodec/h264dec.c        |   3 +
>>  libavcodec/hwaccels.h       |   1 +
>>  libavcodec/hwconfig.h       |   2 +
>>  14 files changed, 1028 insertions(+), 42 deletions(-)
>>  create mode 100644 libavcodec/d3d12va.c
>>  create mode 100644 libavcodec/d3d12va.h
>>  create mode 100644 libavcodec/d3d12va_h264.c
>>
>> diff --git a/configure b/configure
>> index b86064e36f..f5dad4653f 100755
>> --- a/configure
>> +++ b/configure
>> @@ -3033,6 +3033,8 @@ h264_d3d11va_hwaccel_deps="d3d11va"
>>  h264_d3d11va_hwaccel_select="h264_decoder"
>>  h264_d3d11va2_hwaccel_deps="d3d11va"
>>  h264_d3d11va2_hwaccel_select="h264_decoder"
>> +h264_d3d12va_hwaccel_deps="d3d12va"
>> +h264_d3d12va_hwaccel_select="h264_decoder"
>>  h264_dxva2_hwaccel_deps="dxva2"
>>  h264_dxva2_hwaccel_select="h264_decoder"
>>  h264_nvdec_hwaccel_deps="nvdec"
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index 9aacc1d477..ae143d8821 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -977,6 +977,7 @@ OBJS-$(CONFIG_ADPCM_ZORK_DECODER)         +=
>adpcm.o
>> adpcm_data.o
>>
>>  # hardware accelerators
>>  OBJS-$(CONFIG_D3D11VA)                    += dxva2.o
>> +OBJS-$(CONFIG_D3D12VA)                    += dxva2.o d3d12va.o
>>  OBJS-$(CONFIG_DXVA2)                      += dxva2.o
>>  OBJS-$(CONFIG_NVDEC)                      += nvdec.o
>>  OBJS-$(CONFIG_VAAPI)                      += vaapi_decode.o
>> @@ -994,6 +995,7 @@ OBJS-$(CONFIG_H263_VAAPI_HWACCEL)         +=
>vaapi_mpeg4.o
>>  OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
>>  OBJS-$(CONFIG_H264_D3D11VA_HWACCEL)       += dxva2_h264.o
>>  OBJS-$(CONFIG_H264_DXVA2_HWACCEL)         += dxva2_h264.o
>> +OBJS-$(CONFIG_H264_D3D12VA_HWACCEL)       += dxva2_h264.o
>d3d12va_h264.o
>>  OBJS-$(CONFIG_H264_NVDEC_HWACCEL)         += nvdec_h264.o
>>  OBJS-$(CONFIG_H264_QSV_HWACCEL)           += qsvdec.o
>>  OBJS-$(CONFIG_H264_VAAPI_HWACCEL)         += vaapi_h264.o
>> @@ -1277,6 +1279,7 @@ SKIPHEADERS                            +=
>> %_tablegen.h                  \
>>
>>  SKIPHEADERS-$(CONFIG_AMF)              += amfenc.h
>>  SKIPHEADERS-$(CONFIG_D3D11VA)          += d3d11va.h dxva2_internal.h
>> +SKIPHEADERS-$(CONFIG_D3D12VA)          += d3d12va.h
>>  SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
>>  SKIPHEADERS-$(CONFIG_JNI)              += ffjni.h
>>  SKIPHEADERS-$(CONFIG_LCMS2)            += fflcms2.h
>> diff --git a/libavcodec/d3d11va.h b/libavcodec/d3d11va.h
>> index 6816b6c1e6..27f40e5519 100644
>> --- a/libavcodec/d3d11va.h
>> +++ b/libavcodec/d3d11va.h
>> @@ -45,9 +45,6 @@
>>   * @{
>>   */
>>
>> -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work
>around for
>> Direct3D11 and old UVD/UVD+ ATI video cards
>> -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO    2 ///< Work
>around for
>> Direct3D11 and old Intel GPUs with ClearVideo interface
>> -
>>  /**
>>   * This structure is used to provides the necessary configurations and data
>>   * to the Direct3D11 FFmpeg HWAccel implementation.
>> diff --git a/libavcodec/d3d12va.c b/libavcodec/d3d12va.c
>> new file mode 100644
>> index 0000000000..7f1fab7251
>> --- /dev/null
>> +++ b/libavcodec/d3d12va.c
>> @@ -0,0 +1,552 @@
>> +/*
>> + * Direct3D 12 HW acceleration video decoder
>> + *
>> + * copyright (c) 2022-2023 Wu Jianhua <toqsxw at outlook.com>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
>> USA
>> + */
>> +
>> +#include <assert.h>
>> +#include <string.h>
>> +#include <initguid.h>
>> +
>> +#include "libavutil/common.h"
>> +#include "libavutil/log.h"
>> +#include "libavutil/time.h"
>> +#include "libavutil/imgutils.h"
>> +#include "libavutil/hwcontext_d3d12va_internal.h"
>> +#include "libavutil/hwcontext_d3d12va.h"
>> +#include "avcodec.h"
>> +#include "decode.h"
>> +#include "d3d12va.h"
>> +
>> +typedef struct CommandAllocator {
>> +    ID3D12CommandAllocator *command_allocator;
>> +    uint64_t fence_value;
>> +} CommandAllocator;
>> +
>> +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx)
>> +{
>> +    AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
>> +    return av_image_get_buffer_size(frames_ctx->sw_format, avctx-
>> >coded_width, avctx->coded_height, 1);
>> +}
>> +
>> +static int d3d12va_get_valid_command_allocator(AVCodecContext *avctx,
>> ID3D12CommandAllocator **ppAllocator)
>> +{
>> +    HRESULT hr;
>> +    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
>> +    CommandAllocator allocator;
>> +
>> +    if (av_fifo_peek(ctx->allocator_queue, &allocator, 1, 0) >= 0) {
>> +        uint64_t completion = ID3D12Fence_GetCompletedValue(ctx-
>>sync_ctx-
>> >fence);
>> +        if (completion >= allocator.fence_value) {
>> +            *ppAllocator = allocator.command_allocator;
>> +            av_fifo_read(ctx->allocator_queue, &allocator, 1);
>> +            return 0;
>> +        }
>> +    }
>> +
>> +    hr = ID3D12Device_CreateCommandAllocator(ctx->device_ctx->device,
>> D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
>> +        &IID_ID3D12CommandAllocator, ppAllocator);
>> +    if (FAILED(hr)) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to create a new command
>> allocator!\n");
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int d3d12va_discard_command_allocator(AVCodecContext *avctx,
>> ID3D12CommandAllocator *pAllocator, uint64_t fence_value)
>> +{
>> +    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
>> +
>> +    CommandAllocator allocator = {
>> +        .command_allocator = pAllocator,
>> +        .fence_value = fence_value
>> +    };
>> +
>> +    if (av_fifo_write(ctx->allocator_queue, &allocator, 1) < 0) {
>> +        D3D12_OBJECT_RELEASE(pAllocator);
>> +        return AVERROR(ENOMEM);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void bufref_free_interface(void *opaque, uint8_t *data)
>> +{
>> +    D3D12_OBJECT_RELEASE(opaque);
>> +}
>> +
>> +static AVBufferRef *bufref_wrap_interface(IUnknown *iface)
>> +{
>> +    return av_buffer_create((uint8_t*)iface, 1, bufref_free_interface, iface,
>> 0);
>> +}
>> +
>> +static int d3d12va_create_buffer(AVCodecContext *avctx, UINT size,
>> ID3D12Resource **ppResouce)
>> +{
>> +    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
>> +
>> +    D3D12_HEAP_PROPERTIES heap_props = { .Type =
>D3D12_HEAP_TYPE_UPLOAD };
>> +
>> +    D3D12_RESOURCE_DESC desc = {
>> +        .Dimension        = D3D12_RESOURCE_DIMENSION_BUFFER,
>> +        .Alignment        =
>D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT,
>> +        .Width            = size,
>> +        .Height           = 1,
>> +        .DepthOrArraySize = 1,
>> +        .MipLevels        = 1,
>> +        .Format           = DXGI_FORMAT_UNKNOWN,
>> +        .SampleDesc       = { .Count = 1, .Quality = 0 },
>> +        .Layout           = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
>> +        .Flags            = D3D12_RESOURCE_FLAG_NONE,
>> +    };
>> +
>> +    HRESULT hr = ID3D12Device_CreateCommittedResource(ctx->device_ctx-
>> >device, &heap_props, D3D12_HEAP_FLAG_NONE,
>> +        &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
>&IID_ID3D12Resource,
>> ppResouce);
>> +
>> +    if (FAILED(hr)) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to create d3d12 buffer.\n");
>> +        return  AVERROR(EINVAL);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int d3d12va_wait_for_gpu(AVCodecContext *avctx)
>> +{
>> +    D3D12VADecodeContext *ctx      = D3D12VA_DECODE_CONTEXT(avctx);
>> +    AVD3D12VASyncContext *sync_ctx = ctx->sync_ctx;
>> +
>> +    return av_d3d12va_wait_queue_idle(sync_ctx, ctx->command_queue);
>> +}
>> +
>> +static int d3d12va_create_decoder_heap(AVCodecContext *avctx)
>> +{
>> +    D3D12VADecodeContext   *ctx        = D3D12VA_DECODE_CONTEXT(avctx);
>> +    AVHWFramesContext      *frames_ctx =
>D3D12VA_FRAMES_CONTEXT(avctx);
>> +    AVD3D12VADeviceContext *hwctx      = ctx->device_ctx;
>> +
>> +    D3D12_VIDEO_DECODER_HEAP_DESC desc = {
>> +        .NodeMask      = 0,
>> +        .Configuration = ctx->cfg,
>> +        .DecodeWidth   = frames_ctx->width,
>> +        .DecodeHeight  = frames_ctx->height,
>> +        .Format        = av_d3d12va_map_sw_to_hw_format(frames_ctx-
>> >sw_format),
>> +        .FrameRate     = { avctx->framerate.num, avctx->framerate.den },
>> +        .BitRate       = avctx->bit_rate,
>> +        .MaxDecodePictureBufferCount = frames_ctx->initial_pool_size,
>> +    };
>> +
>> +    DX_CHECK(ID3D12VideoDevice_CreateVideoDecoderHeap(hwctx-
>>video_device,
>> &desc,
>> +        &IID_ID3D12VideoDecoderHeap, &ctx->decoder_heap));
>> +
>> +    return 0;
>> +
>> +fail:
>> +    if (ctx->decoder) {
>> +        av_log(avctx, AV_LOG_ERROR, "D3D12 doesn't support decoding
>frames
>> with an extent "
>> +            "[width(%d), height(%d)], on your device!\n", frames_ctx->width,
>> frames_ctx->height);
>> +    }
>> +
>> +    return AVERROR(EINVAL);
>> +}
>> +
>> +static int d3d12va_create_decoder(AVCodecContext *avctx)
>> +{
>> +    D3D12_VIDEO_DECODER_DESC desc;
>> +    D3D12VADecodeContext   *ctx        = D3D12VA_DECODE_CONTEXT(avctx);
>> +    AVHWFramesContext      *frames_ctx =
>D3D12VA_FRAMES_CONTEXT(avctx);
>> +    AVD3D12VADeviceContext *hwctx      = ctx->device_ctx;
>> +
>> +    D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT feature = {
>> +        .NodeIndex     = 0,
>> +        .Configuration = ctx->cfg,
>> +        .Width         = frames_ctx->width,
>> +        .Height        = frames_ctx->height,
>> +        .DecodeFormat  = av_d3d12va_map_sw_to_hw_format(frames_ctx-
>> >sw_format),
>> +        .FrameRate     = { avctx->framerate.num, avctx->framerate.den },
>> +        .BitRate       = avctx->bit_rate,
>> +    };
>> +
>> +    DX_CHECK(ID3D12VideoDevice_CheckFeatureSupport(hwctx-
>>video_device,
>> D3D12_FEATURE_VIDEO_DECODE_SUPPORT, &feature, sizeof(feature)));
>> +    if (!(feature.SupportFlags &
>D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)
>> ||
>> +        !(feature.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2)) {
>> +        av_log(avctx, AV_LOG_ERROR, "D3D12 decoder doesn't support on
>this
>> device\n");
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    desc = (D3D12_VIDEO_DECODER_DESC) {
>> +        .NodeMask = 0,
>> +        .Configuration = ctx->cfg,
>> +    };
>> +
>> +    DX_CHECK(ID3D12VideoDevice_CreateVideoDecoder(hwctx-
>>video_device, &desc,
>> &IID_ID3D12VideoDecoder, &ctx->decoder));
>> +
>> +    ctx->decoder_ref = bufref_wrap_interface((IUnknown *)ctx->decoder);
>> +    if (!ctx->decoder_ref)
>> +        return AVERROR(ENOMEM);
>> +
>> +    return 0;
>> +
>> +fail:
>> +    return AVERROR(EINVAL);
>> +}
>> +
>> +static inline int d3d12va_get_num_surfaces(enum AVCodecID codec_id)
>> +{
>> +    int num_surfaces = 1;
>> +    switch (codec_id) {
>> +    case AV_CODEC_ID_H264:
>> +    case AV_CODEC_ID_HEVC:
>> +        num_surfaces += 16;
>> +        break;
>> +
>> +    case AV_CODEC_ID_AV1:
>> +        num_surfaces += 12;
>> +        break;
>> +
>> +    case AV_CODEC_ID_VP9:
>> +        num_surfaces += 8;
>> +        break;
>> +
>> +    default:
>> +        num_surfaces += 2;
>> +    }
>> +
>> +    return num_surfaces;
>> +}
>> +
>> +int ff_d3d12va_common_frame_params(AVCodecContext *avctx,
>AVBufferRef
>> *hw_frames_ctx)
>> +{
>> +    AVHWFramesContext      *frames_ctx   = (AVHWFramesContext
>> *)hw_frames_ctx->data;
>> +    AVHWDeviceContext      *device_ctx   = frames_ctx->device_ctx;
>> +    AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
>> +
>> +    frames_ctx->format    = AV_PIX_FMT_D3D12;
>> +    frames_ctx->sw_format = avctx->sw_pix_fmt ==
>AV_PIX_FMT_YUV420P10 ?
>> AV_PIX_FMT_P010 : AV_PIX_FMT_NV12;
>> +    frames_ctx->width     = avctx->width;
>> +    frames_ctx->height    = avctx->height;
>> +
>> +    frames_ctx->initial_pool_size = d3d12va_get_num_surfaces(avctx-
>> >codec_id);
>> +
>> +    return 0;
>> +}
>> +
>> +int ff_d3d12va_decode_init(AVCodecContext *avctx)
>> +{
>> +    int ret;
>> +    UINT bitstream_size;
>> +    AVHWFramesContext *frames_ctx;
>> +    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
>> +
>> +    ID3D12CommandAllocator *command_allocator = NULL;
>> +    D3D12_COMMAND_QUEUE_DESC queue_desc = {
>> +        .Type     = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
>> +        .Priority = 0,
>> +        .Flags    = D3D12_COMMAND_QUEUE_FLAG_NONE,
>> +        .NodeMask = 0
>> +    };
>> +
>> +    ctx->pix_fmt = avctx->hwaccel->pix_fmt;
>> +
>> +    ret = ff_decode_get_hw_frames_ctx(avctx,
>AV_HWDEVICE_TYPE_D3D12VA);
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
>> +    ctx->device_ctx = (AVD3D12VADeviceContext *)frames_ctx->device_ctx-
>> >hwctx;
>> +
>> +    if (frames_ctx->format != ctx->pix_fmt) {
>> +        av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n");
>> +        goto fail;
>> +    }
>> +
>> +    ret = d3d12va_create_decoder(avctx);
>> +    if (ret < 0)
>> +        goto fail;
>> +
>> +    ret = d3d12va_create_decoder_heap(avctx);
>> +    if (ret < 0)
>> +        goto fail;
>> +
>> +    ctx->max_num_ref = frames_ctx->initial_pool_size;
>> +
>> +    bitstream_size = ff_d3d12va_get_suitable_max_bitstream_size(avctx);
>> +    ctx->buffers = av_calloc(sizeof(ID3D12Resource *), ctx->max_num_ref);
>> +    for (int i = 0; i < ctx->max_num_ref; i++) {
>> +        ret = d3d12va_create_buffer(avctx, bitstream_size, &ctx->buffers[i]);
>> +        if (ret < 0)
>> +            goto fail;
>> +    }
>> +
>> +    ctx->ref_resources = av_calloc(sizeof(ID3D12Resource *), ctx-
>> >max_num_ref);
>> +    if (!ctx->ref_resources)
>> +        return AVERROR(ENOMEM);
>> +
>> +    ctx->ref_subresources = av_calloc(sizeof(UINT), ctx->max_num_ref);
>> +    if (!ctx->ref_subresources)
>> +        return AVERROR(ENOMEM);
>> +
>> +    ctx->allocator_queue = av_fifo_alloc2(ctx->max_num_ref,
>> sizeof(CommandAllocator), AV_FIFO_FLAG_AUTO_GROW);
>> +    if (!ctx->allocator_queue)
>> +        return AVERROR(ENOMEM);
>> +
>> +    ret = av_d3d12va_sync_context_alloc(ctx->device_ctx, &ctx->sync_ctx);
>> +    if (ret < 0)
>> +        goto fail;
>> +
>> +    ret = d3d12va_get_valid_command_allocator(avctx,
>&command_allocator);
>> +    if (ret < 0)
>> +        goto fail;
>> +
>> +    DX_CHECK(ID3D12Device_CreateCommandQueue(ctx->device_ctx-
>>device,
>> &queue_desc,
>> +        &IID_ID3D12CommandQueue, &ctx->command_queue));
>> +
>> +    DX_CHECK(ID3D12Device_CreateCommandList(ctx->device_ctx->device,
>0,
>> queue_desc.Type,
>> +        command_allocator, NULL, &IID_ID3D12CommandList, &ctx-
>> >command_list));
>> +
>> +    DX_CHECK(ID3D12VideoDecodeCommandList_Close(ctx-
>>command_list));
>> +
>> +    ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue,
>1,
>> (ID3D12CommandList **)&ctx->command_list);
>> +
>> +    d3d12va_wait_for_gpu(avctx);
>> +
>> +    d3d12va_discard_command_allocator(avctx, command_allocator, ctx-
>> >sync_ctx->fence_value);
>> +
>> +    return 0;
>> +
>> +fail:
>> +    D3D12_OBJECT_RELEASE(command_allocator);
>> +    ff_d3d12va_decode_uninit(avctx);
>> +
>> +    return AVERROR(EINVAL);
>> +}
>> +
>> +int ff_d3d12va_decode_uninit(AVCodecContext *avctx)
>> +{
>> +    int i, num_allocator = 0;
>> +    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
>> +    CommandAllocator allocator;
>> +
>> +    if (ctx->sync_ctx)
>> +        d3d12va_wait_for_gpu(avctx);
>> +
>> +    av_freep(&ctx->ref_resources);
>> +
>> +    av_freep(&ctx->ref_subresources);
>> +
>> +    for (i = 0; i < ctx->max_num_ref; i++)
>> +        D3D12_OBJECT_RELEASE(ctx->buffers[i]);
>> +
>> +    av_freep(&ctx->buffers);
>> +
>> +    D3D12_OBJECT_RELEASE(ctx->command_list);
>> +
>> +    D3D12_OBJECT_RELEASE(ctx->command_queue);
>> +
>> +    if (ctx->allocator_queue) {
>> +        while (av_fifo_read(ctx->allocator_queue, &allocator, 1) >= 0) {
>> +            num_allocator++;
>> +            D3D12_OBJECT_RELEASE(allocator.command_allocator);
>> +        }
>> +
>> +        av_log(avctx, AV_LOG_VERBOSE, "Total number of command
>allocators
>> reused: %d\n", num_allocator);
>> +    }
>> +
>> +    av_fifo_freep2(&ctx->allocator_queue);
>> +
>> +    av_d3d12va_sync_context_free(&ctx->sync_ctx);
>> +
>> +    D3D12_OBJECT_RELEASE(ctx->decoder_heap);
>> +
>> +    av_buffer_unref(&ctx->decoder_ref);
>> +
>> +    return 0;
>> +}
>> +
>> +static ID3D12Resource *get_surface(const AVFrame *frame)
>> +{
>> +    return (ID3D12Resource *)frame->data[0];
>> +}
>> +
>> +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *ctx, const
>AVFrame*
>> frame)
>> +{
>> +    return (intptr_t)frame->data[1];
>> +}
>> +
>> +static AVD3D12VASyncContext *d3d12va_get_sync_context(const AVFrame
>*frame)
>> +{
>> +    return (AVD3D12VASyncContext *)frame->data[2];
>> +}
>> +
>> +static int d3d12va_begin_update_reference_frames(AVCodecContext
>*avctx,
>> D3D12_RESOURCE_BARRIER *barriers, int index)
>> +{
>> +    D3D12VADecodeContext   *ctx          =
>D3D12VA_DECODE_CONTEXT(avctx);
>> +    AVHWFramesContext      *frames_ctx   =
>D3D12VA_FRAMES_CONTEXT(avctx);
>> +    AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
>> +
>> +    int num_barrier = 0;
>> +
>> +    for (int i = 0; i < ctx->max_num_ref; i++) {
>> +        if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx-
>> >texture_infos[index].texture) {
>> +            barriers[num_barrier].Type  =
>> D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
>> +            barriers[num_barrier].Flags =
>D3D12_RESOURCE_BARRIER_FLAG_NONE;
>> +            barriers[num_barrier].Transition =
>> (D3D12_RESOURCE_TRANSITION_BARRIER){
>> +                .pResource   = ctx->ref_resources[i],
>> +                .Subresource = 0,
>> +                .StateBefore = D3D12_RESOURCE_STATE_COMMON,
>> +                .StateAfter  = D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,
>> +            };
>> +            num_barrier++;
>> +        }
>> +    }
>> +
>> +    return num_barrier;
>> +}
>> +
>> +static void d3d12va_end_update_reference_frames(AVCodecContext
>*avctx,
>> D3D12_RESOURCE_BARRIER *barriers, int index)
>> +{
>> +    D3D12VADecodeContext   *ctx          =
>D3D12VA_DECODE_CONTEXT(avctx);
>> +    AVHWFramesContext      *frames_ctx   =
>D3D12VA_FRAMES_CONTEXT(avctx);
>> +    AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
>> +    int num_barrier = 0;
>> +
>> +    for (int i = 0; i < ctx->max_num_ref; i++) {
>> +        if (ctx->ref_resources[i] && ctx->ref_resources[i] != frames_hwctx-
>> >texture_infos[index].texture) {
>> +            barriers[num_barrier].Transition.pResource = ctx-
>> >ref_resources[i];
>> +            barriers[num_barrier].Flags =
>D3D12_RESOURCE_BARRIER_FLAG_NONE;
>> +            barriers[num_barrier].Transition.StateBefore =
>> D3D12_RESOURCE_STATE_VIDEO_DECODE_READ;
>> +            barriers[num_barrier].Transition.StateAfter =
>> D3D12_RESOURCE_STATE_COMMON;
>> +            num_barrier++;
>> +        }
>> +    }
>> +}
>> +
>> +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame
>*frame,
>> +                              const void *pp, unsigned pp_size,
>> +                              const void *qm, unsigned qm_size,
>> +                              int(*update_input_arguments)(AVCodecContext *,
>> D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource
>*))
>> +{
>> +    int ret;
>> +    D3D12VADecodeContext   *ctx               =
>> D3D12VA_DECODE_CONTEXT(avctx);
>> +    AVHWFramesContext      *frames_ctx        =
>> D3D12VA_FRAMES_CONTEXT(avctx);
>> +    AVD3D12VAFramesContext *frames_hwctx      = frames_ctx->hwctx;
>> +    ID3D12CommandAllocator *command_allocator = NULL;
>> +
>> +    ID3D12Resource *resource = get_surface(frame);
>> +    UINT index = ff_d3d12va_get_surface_index(avctx, frame);
>> +    AVD3D12VASyncContext *sync_ctx = d3d12va_get_sync_context(frame);
>> +
>> +    ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list;
>> +    D3D12_RESOURCE_BARRIER barriers[D3D12VA_MAX_SURFACES] = { 0 };
>> +
>> +    D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS input_args = {
>> +        .NumFrameArguments = 2,
>> +        .FrameArguments = {
>> +            [0] = {
>> +                .Type  =
>D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
>> +                .Size  = pp_size,
>> +                .pData = (void *)pp,
>> +            },
>> +            [1] = {
>> +                .Type  =
>>
>D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRI
>X,
>> +                .Size  = qm_size,
>> +                .pData = (void *)qm,
>> +            },
>> +        },
>> +        .pHeap = ctx->decoder_heap,
>> +    };
>> +
>> +    D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = {
>> +        .ConversionArguments = 0,
>> +        .OutputSubresource   = 0,
>> +        .pOutputTexture2D    = resource,
>> +    };
>> +
>> +    UINT num_barrier = 1;
>> +    barriers[0] = (D3D12_RESOURCE_BARRIER) {
>> +        .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
>> +        .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
>> +        .Transition = {
>> +            .pResource   = resource,
>> +            .Subresource = 0,
>> +            .StateBefore = D3D12_RESOURCE_STATE_COMMON,
>> +            .StateAfter  = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
>> +        },
>> +    };
>> +
>> +    memset(ctx->ref_resources, 0, sizeof(ID3D12Resource *) * ctx-
>> >max_num_ref);
>> +    memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
>> +    input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
>> +    input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_resources;
>> +    input_args.ReferenceFrames.pSubresources = ctx->ref_subresources;
>> +
>> +    av_d3d12va_wait_idle(sync_ctx);
>> +
>> +    if (!qm)
>> +        input_args.NumFrameArguments = 1;
>> +
>> +    ret = update_input_arguments(avctx, &input_args, ctx->buffers[index]);
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    ret = d3d12va_get_valid_command_allocator(avctx,
>&command_allocator);
>> +    if (ret < 0)
>> +        goto fail;
>> +
>> +    DX_CHECK(ID3D12CommandAllocator_Reset(command_allocator));
>> +
>> +    DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list,
>> command_allocator));
>> +
>> +    num_barrier += d3d12va_begin_update_reference_frames(avctx,
>&barriers[1],
>> index);
>> +
>> +    ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list,
>num_barrier,
>> barriers);
>> +
>> +    ID3D12VideoDecodeCommandList_DecodeFrame(cmd_list, ctx->decoder,
>> &output_args, &input_args);
>> +
>> +    barriers[0].Transition.StateBefore = barriers[0].Transition.StateAfter;
>> +    barriers[0].Transition.StateAfter  = D3D12_RESOURCE_STATE_COMMON;
>> +    d3d12va_end_update_reference_frames(avctx, &barriers[1], index);
>> +
>> +    ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list,
>num_barrier,
>> barriers);
>> +
>> +    DX_CHECK(ID3D12VideoDecodeCommandList_Close(cmd_list));
>> +
>> +    ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue,
>1,
>> (ID3D12CommandList **)&ctx->command_list);
>> +
>> +    DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue,
>sync_ctx->fence,
>> ++sync_ctx->fence_value));
>> +
>> +    DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx-
>>sync_ctx-
>> >fence, ++ctx->sync_ctx->fence_value));
>> +
>> +    ret = d3d12va_discard_command_allocator(avctx, command_allocator,
>ctx-
>> >sync_ctx->fence_value);
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    if (ctx->device_ctx->sync) {
>> +        ret = av_d3d12va_wait_idle(ctx->sync_ctx);
>> +        if (ret < 0)
>> +            return ret;
>> +    }
>> +
>> +    return 0;
>> +
>> +fail:
>> +    if (command_allocator)
>> +        d3d12va_discard_command_allocator(avctx, command_allocator, ctx-
>> >sync_ctx->fence_value);
>> +    return AVERROR(EINVAL);
>> +}
>> diff --git a/libavcodec/d3d12va.h b/libavcodec/d3d12va.h
>> new file mode 100644
>> index 0000000000..da3e7b7ab9
>> --- /dev/null
>> +++ b/libavcodec/d3d12va.h
>> @@ -0,0 +1,184 @@
>> +/*
>> + * Direct3D 12 HW acceleration video decoder
>> + *
>> + * copyright (c) 2022-2023 Wu Jianhua <toqsxw at outlook.com>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
>> USA
>> + */
>> +
>> +#ifndef AVCODEC_D3D12VA_H
>> +#define AVCODEC_D3D12VA_H
>> +
>> +#include "libavutil/fifo.h"
>> +#include "libavutil/hwcontext.h"
>> +#include "libavutil/hwcontext_d3d12va.h"
>> +#include "avcodec.h"
>> +#include "internal.h"
>> +
>> +/**
>> + * @brief This structure is used to provides the necessary configurations
>and
>> data
>> + * to the FFmpeg Direct3D 12 HWAccel implementation for video decoder.
>> + *
>> + * The application must make it available as
>AVCodecContext.hwaccel_context.
>> + */
>
>Does d3d12va support AVCodecContext.hwaccel_context ? If yes, you should
>make
>this file public, modify the structures and remove all ff_ functions, otherwise
>user can't use these structures in an application. If no, please remove the
>above comment and better to rename this file to d3d12va_decode.h because
>the
>structures and functions in this file are for d3d12va decoders.
>
>Thanks
>Haihao


Makes sense. Will remove the comment and rename the files in V4. Thanks.



>
>> +typedef struct D3D12VADecodeContext {
>> +    AVBufferRef *decoder_ref;
>> +
>> +    /**
>> +     * D3D12 video decoder
>> +     */
>> +    ID3D12VideoDecoder *decoder;
>> +
>> +    /**
>> +     * D3D12 video decoder heap
>> +     */
>> +    ID3D12VideoDecoderHeap *decoder_heap;
>> +
>> +    /**
>> +     * D3D12 configuration used to create the decoder
>> +     *
>> +     * Specified by decoders
>> +     */
>> +    D3D12_VIDEO_DECODE_CONFIGURATION cfg;
>> +
>> +    /**
>> +     * A cached queue for reusing the D3D12 command allocators
>> +     *
>> +     * @see
>> https://learn.microsoft.com/en-us/windows/win32/direct3d12/recording-
>command-lists-and-bundles#id3d12commandallocator
>> +     */
>> +    AVFifo *allocator_queue;
>> +
>> +    /**
>> +     * D3D12 command queue
>> +     */
>> +    ID3D12CommandQueue *command_queue;
>> +
>> +    /**
>> +     * D3D12 video decode command list
>> +     */
>> +    ID3D12VideoDecodeCommandList *command_list;
>> +
>> +    /**
>> +     * The array of buffer resources used to upload compressed bitstream
>> +     *
>> +     * The buffers.length is the same as
>D3D12VADecodeContext.max_num_ref
>> +     */
>> +    ID3D12Resource **buffers;
>> +
>> +    /**
>> +     * The array of resources used for reference frames
>> +     *
>> +     * The ref_resources.length is the same as
>> D3D12VADecodeContext.max_num_ref
>> +     */
>> +    ID3D12Resource **ref_resources;
>> +
>> +    /**
>> +     * The array of subresources used for reference frames
>> +     *
>> +     * The ref_subresources.length is the same as
>> D3D12VADecodeContext.max_num_ref
>> +     */
>> +    UINT *ref_subresources;
>> +
>> +    /**
>> +     * Maximum number of reference frames
>> +     */
>> +    UINT max_num_ref;
>> +
>> +    /**
>> +     * The sync context used to sync command queue
>> +     */
>> +    AVD3D12VASyncContext *sync_ctx;
>> +
>> +    /**
>> +     * A pointer to AVD3D12VADeviceContext used to create D3D12 objects
>> +     */
>> +    AVD3D12VADeviceContext *device_ctx;
>> +
>> +    /**
>> +     * Pixel format
>> +     */
>> +    enum AVPixelFormat pix_fmt;
>> +
>> +    /**
>> +     * Private to the FFmpeg AVHWAccel implementation
>> +     */
>> +    unsigned report_id;
>> +} D3D12VADecodeContext;
>> +
>> +/**
>> + * @}
>> + */
>> +
>> +#define D3D12VA_DECODE_CONTEXT(avctx) ((D3D12VADecodeContext
>*)((avctx)-
>> >internal->hwaccel_priv_data))
>> +#define D3D12VA_FRAMES_CONTEXT(avctx) ((AVHWFramesContext
>*)(avctx)-
>> >hw_frames_ctx->data)
>> +
>> +/**
>> + * @brief Get a suitable maximum bitstream size
>> + *
>> + * Creating and destroying a resource on d3d12 needs sync and reallocation,
>> so use this function
>> + * to help allocate a big enough bitstream buffer to avoid recreating
>> resources when decoding.
>> + *
>> + * @return the suitable size
>> + */
>> +int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx);
>> +
>> +/**
>> + * @brief init D3D12VADecodeContext
>> + *
>> + * @return Error code (ret < 0 if failed)
>> + */
>> +int ff_d3d12va_decode_init(AVCodecContext *avctx);
>> +
>> +/**
>> + * @brief uninit D3D12VADecodeContext
>> + *
>> + * @return Error code (ret < 0 if failed)
>> + */
>> +int ff_d3d12va_decode_uninit(AVCodecContext *avctx);
>> +
>> +/**
>> + * @brief d3d12va common frame params
>> + *
>> + * @return Error code (ret < 0 if failed)
>> + */
>> +int ff_d3d12va_common_frame_params(AVCodecContext *avctx,
>AVBufferRef
>> *hw_frames_ctx);
>> +
>> +/**
>> + * @brief d3d12va common end frame
>> + *
>> + * @param avctx    codec context
>> + * @param frame    current output frame
>> + * @param pp       picture parameters
>> + * @param pp_size  the size of the picture parameters
>> + * @param qm       quantization matrix
>> + * @param qm_size  the size of the quantization matrix
>> + * @param callback update decoder-specified input stream arguments
>> + * @return Error code (ret < 0 if failed)
>> + */
>> +int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame
>*frame,
>> +    const void *pp, unsigned pp_size,
>> +    const void *qm, unsigned qm_size,
>> +    int(*)(AVCodecContext *,
>D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *,
>> ID3D12Resource *));
>> +
>> +/**
>> + * @brief get surface index
>> + *
>> + * @return index
>> + */
>> +intptr_t ff_d3d12va_get_surface_index(AVCodecContext *avctx, const
>AVFrame
>> *frame);
>> +
>> +#endif /* AVCODEC_D3D12VA_DEC_H */
>> diff --git a/libavcodec/d3d12va_h264.c b/libavcodec/d3d12va_h264.c
>> new file mode 100644
>> index 0000000000..0810a034b4
>> --- /dev/null
>> +++ b/libavcodec/d3d12va_h264.c
>> @@ -0,0 +1,210 @@
>> +/*
>> + * Direct3D 12 h264 HW acceleration
>> + *
>> + * copyright (c) 2022-2023 Wu Jianhua <toqsxw at outlook.com>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
>> USA
>> + */
>> +
>> +#include "config_components.h"
>> +#include "libavutil/avassert.h"
>> +#include "h264dec.h"
>> +#include "h264data.h"
>> +#include "h264_ps.h"
>> +#include "mpegutils.h"
>> +#include "dxva2_internal.h"
>> +#include "d3d12va.h"
>> +#include "libavutil/hwcontext_d3d12va_internal.h"
>> +#include <dxva.h>
>> +
>> +typedef struct H264DecodePictureContext {
>> +    DXVA_PicParams_H264   pp;
>> +    DXVA_Qmatrix_H264     qm;
>> +    unsigned              slice_count;
>> +    DXVA_Slice_H264_Short slice_short[MAX_SLICES];
>> +    const uint8_t         *bitstream;
>> +    unsigned              bitstream_size;
>> +} H264DecodePictureContext;
>> +
>> +static void fill_slice_short(DXVA_Slice_H264_Short *slice,
>> +                             unsigned position, unsigned size)
>> +{
>> +    memset(slice, 0, sizeof(*slice));
>> +    slice->BSNALunitDataLocation = position;
>> +    slice->SliceBytesInBuffer    = size;
>> +    slice->wBadSliceChopping     = 0;
>> +}
>> +
>> +static int d3d12va_h264_start_frame(AVCodecContext *avctx,
>> +                                  av_unused const uint8_t *buffer,
>> +                                  av_unused uint32_t size)
>> +{
>> +    const H264Context        *h       = avctx->priv_data;
>> +    H264DecodePictureContext *ctx_pic = h->cur_pic_ptr-
>> >hwaccel_picture_private;
>> +    D3D12VADecodeContext     *ctx     = D3D12VA_DECODE_CONTEXT(avctx);
>> +
>> +    if (!ctx)
>> +        return -1;
>> +
>> +    assert(ctx_pic);
>> +
>> +    ff_dxva2_h264_fill_picture_parameters(avctx, (AVDXVAContext *)ctx,
>> &ctx_pic->pp);
>> +
>> +    ff_dxva2_h264_fill_scaling_lists(avctx, (AVDXVAContext *)ctx, &ctx_pic-
>> >qm);
>> +
>> +    ctx_pic->slice_count    = 0;
>> +    ctx_pic->bitstream_size = 0;
>> +    ctx_pic->bitstream      = NULL;
>> +
>> +    return 0;
>> +}
>> +
>> +static int d3d12va_h264_decode_slice(AVCodecContext *avctx, const
>uint8_t
>> *buffer, uint32_t size)
>> +{
>> +    unsigned position;
>> +    const H264Context        *h               = avctx->priv_data;
>> +    const H264SliceContext   *sl              = &h->slice_ctx[0];
>> +    const H264Picture        *current_picture = h->cur_pic_ptr;
>> +    H264DecodePictureContext *ctx_pic         = current_picture-
>> >hwaccel_picture_private;
>> +
>> +    if (ctx_pic->slice_count >= MAX_SLICES)
>> +        return AVERROR(ERANGE);
>> +
>> +    if (!ctx_pic->bitstream)
>> +        ctx_pic->bitstream = buffer;
>> +    ctx_pic->bitstream_size += size;
>> +
>> +    position = buffer - ctx_pic->bitstream;
>> +    fill_slice_short(&ctx_pic->slice_short[ctx_pic->slice_count], position,
>> size);
>> +    ctx_pic->slice_count++;
>> +
>> +    if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type !=
>> AV_PICTURE_TYPE_SI)
>> +        ctx_pic->pp.wBitFields &= ~(1 << 15); /* Set IntraPicFlag to 0 */
>> +
>> +    return 0;
>> +}
>> +
>> +#define START_CODE 65536
>> +#define START_CODE_SIZE 3
>> +static int update_input_arguments(AVCodecContext *avctx,
>> D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *input_args,
>ID3D12Resource *buffer)
>> +{
>> +    D3D12VADecodeContext     *ctx          =
>D3D12VA_DECODE_CONTEXT(avctx);
>> +    AVHWFramesContext        *frames_ctx   =
>D3D12VA_FRAMES_CONTEXT(avctx);
>> +    AVD3D12VAFramesContext   *frames_hwctx = frames_ctx->hwctx;
>> +
>> +    const H264Context        *h               = avctx->priv_data;
>> +    const H264Picture        *current_picture = h->cur_pic_ptr;
>> +    H264DecodePictureContext *ctx_pic         = current_picture-
>> >hwaccel_picture_private;
>> +
>> +    int i, index;
>> +    uint8_t *mapped_data, *mapped_ptr;
>> +    DXVA_Slice_H264_Short *slice;
>> +    D3D12_VIDEO_DECODE_FRAME_ARGUMENT *args;
>> +
>> +    if (FAILED(ID3D12Resource_Map(buffer, 0, NULL, &mapped_data))) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to map D3D12 Buffer
>> resource!\n");
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    mapped_ptr = mapped_data;
>> +    for (i = 0; i < ctx_pic->slice_count; i++) {
>> +        UINT position, size;
>> +        slice = &ctx_pic->slice_short[i];
>> +
>> +        position = slice->BSNALunitDataLocation;
>> +        size     = slice->SliceBytesInBuffer;
>> +
>> +        slice->SliceBytesInBuffer += START_CODE_SIZE;
>> +        slice->BSNALunitDataLocation = mapped_ptr - mapped_data;
>> +
>> +        *(uint32_t *)mapped_ptr = START_CODE;
>> +        mapped_ptr += START_CODE_SIZE;
>> +
>> +        memcpy(mapped_ptr, &ctx_pic->bitstream[position], size);
>> +        mapped_ptr += size;
>> +    }
>> +
>> +    ID3D12Resource_Unmap(buffer, 0, NULL);
>> +
>> +    input_args->CompressedBitstream =
>> (D3D12_VIDEO_DECODE_COMPRESSED_BITSTREAM){
>> +        .pBuffer = buffer,
>> +        .Offset  = 0,
>> +        .Size    = mapped_ptr - mapped_data,
>> +    };
>> +
>> +    args = &input_args->FrameArguments[input_args-
>>NumFrameArguments++];
>> +    args->Type  =
>D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL;
>> +    args->Size  = sizeof(DXVA_Slice_H264_Short) * ctx_pic->slice_count;
>> +    args->pData = ctx_pic->slice_short;
>> +
>> +    index = ctx_pic->pp.CurrPic.Index7Bits;
>> +    ctx->ref_resources[index] = frames_hwctx->texture_infos[index].texture;
>> +    for (i = 0; i < FF_ARRAY_ELEMS(ctx_pic->pp.RefFrameList); i++) {
>> +        index = ctx_pic->pp.RefFrameList[i].Index7Bits;
>> +        if (index != 0x7f)
>> +            ctx->ref_resources[index] = frames_hwctx-
>> >texture_infos[index].texture;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int d3d12va_h264_end_frame(AVCodecContext *avctx)
>> +{
>> +    H264Context               *h       = avctx->priv_data;
>> +    H264DecodePictureContext  *ctx_pic = h->cur_pic_ptr-
>> >hwaccel_picture_private;
>> +    H264SliceContext          *sl      = &h->slice_ctx[0];
>> +
>> +    int ret;
>> +
>> +    if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0)
>> +        return -1;
>> +
>> +    ret = ff_d3d12va_common_end_frame(avctx, h->cur_pic_ptr->f,
>> +                                      &ctx_pic->pp, sizeof(ctx_pic->pp),
>> +                                      &ctx_pic->qm, sizeof(ctx_pic->qm),
>> +                                      update_input_arguments);
>> +    if (!ret)
>> +        ff_h264_draw_horiz_band(h, sl, 0, h->avctx->height);
>> +
>> +    return ret;
>> +}
>> +
>> +static int d3d12va_h264_decode_init(AVCodecContext *avctx)
>> +{
>> +    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
>> +
>> +    ctx->cfg.DecodeProfile = D3D12_VIDEO_DECODE_PROFILE_H264;
>> +
>> +    return ff_d3d12va_decode_init(avctx);
>> +}
>> +
>> +#if CONFIG_H264_D3D12VA_HWACCEL
>> +const AVHWAccel ff_h264_d3d12va_hwaccel = {
>> +    .name                 = "h264_d3d12va",
>> +    .type                 = AVMEDIA_TYPE_VIDEO,
>> +    .id                   = AV_CODEC_ID_H264,
>> +    .pix_fmt              = AV_PIX_FMT_D3D12,
>> +    .init                 = d3d12va_h264_decode_init,
>> +    .uninit               = ff_d3d12va_decode_uninit,
>> +    .start_frame          = d3d12va_h264_start_frame,
>> +    .decode_slice         = d3d12va_h264_decode_slice,
>> +    .end_frame            = d3d12va_h264_end_frame,
>> +    .frame_params         = ff_d3d12va_common_frame_params,
>> +    .frame_priv_data_size = sizeof(H264DecodePictureContext),
>> +    .priv_data_size       = sizeof(D3D12VADecodeContext),
>> +};
>> +#endif
>> diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
>> index 568d686f39..b22ea3e8f2 100644
>> --- a/libavcodec/dxva2.c
>> +++ b/libavcodec/dxva2.c
>> @@ -774,6 +774,10 @@ unsigned ff_dxva2_get_surface_index(const
>AVCodecContext
>> *avctx,
>>      void *surface = get_surface(avctx, frame);
>>      unsigned i;
>>
>> +#if CONFIG_D3D12VA
>> +    if (avctx->pix_fmt == AV_PIX_FMT_D3D12)
>> +        return (intptr_t)frame->data[1];
>> +#endif
>>  #if CONFIG_D3D11VA
>>      if (avctx->pix_fmt == AV_PIX_FMT_D3D11)
>>          return (intptr_t)frame->data[1];
>> @@ -1056,3 +1060,23 @@ int ff_dxva2_is_d3d11(const AVCodecContext
>*avctx)
>>      else
>>          return 0;
>>  }
>> +
>> +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx,
>AVDXVAContext
>> *ctx)
>> +{
>> +    unsigned *report_id = NULL;
>> +
>> +#if CONFIG_D3D12VA
>> +    if (avctx->pix_fmt == AV_PIX_FMT_D3D12)
>> +        report_id = &ctx->d3d12va.report_id;
>> +#endif
>> +#if CONFIG_D3D11VA
>> +    if (ff_dxva2_is_d3d11(avctx))
>> +        report_id = &ctx->d3d11va.report_id;
>> +#endif
>> +#if CONFIG_DXVA2
>> +    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
>> +        report_id = &ctx->dxva2.report_id;
>> +#endif
>> +
>> +    return report_id;
>> +}
>> diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h
>> index 22c93992f2..bdec6112e9 100644
>> --- a/libavcodec/dxva2.h
>> +++ b/libavcodec/dxva2.h
>> @@ -45,9 +45,6 @@
>>   * @{
>>   */
>>
>> -#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work
>around for DXVA2
>> and old UVD/UVD+ ATI video cards
>> -#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO    2 ///< Work
>around for DXVA2
>> and old Intel GPUs with ClearVideo interface
>> -
>>  /**
>>   * This structure is used to provides the necessary configurations and data
>>   * to the DXVA2 FFmpeg HWAccel implementation.
>> diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
>> index 6300b1418d..7a076ea981 100644
>> --- a/libavcodec/dxva2_h264.c
>> +++ b/libavcodec/dxva2_h264.c
>> @@ -47,9 +47,10 @@ static void fill_picture_entry(DXVA_PicEntry_H264
>*pic,
>>      pic->bPicEntry = index | (flag << 7);
>>  }
>>
>> -static void fill_picture_parameters(const AVCodecContext *avctx,
>> AVDXVAContext *ctx, const H264Context *h,
>> +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx,
>> AVDXVAContext *ctx,
>>                                      DXVA_PicParams_H264 *pp)
>>  {
>> +    const H264Context *h               = avctx->priv_data;
>>      const H264Picture *current_picture = h->cur_pic_ptr;
>>      const SPS *sps = h->ps.sps;
>>      const PPS *pps = h->ps.pps;
>> @@ -163,9 +164,10 @@ static void fill_picture_parameters(const
>AVCodecContext
>> *avctx, AVDXVAContext *
>>      //pp->SliceGroupMap[810];               /* XXX not implemented by FFmpeg
>> */
>>  }
>>
>> -static void fill_scaling_lists(const AVCodecContext *avctx, AVDXVAContext
>> *ctx, const H264Context *h, DXVA_Qmatrix_H264 *qm)
>> +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx,
>> AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm)
>>  {
>> -    const PPS *pps = h->ps.pps;
>> +    const H264Context *h   = avctx->priv_data;
>> +    const PPS         *pps = h->ps.pps;
>>      unsigned i, j;
>>      memset(qm, 0, sizeof(*qm));
>>      if (DXVA_CONTEXT_WORKAROUND(avctx, ctx) &
>> FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) {
>> @@ -453,10 +455,10 @@ static int
>dxva2_h264_start_frame(AVCodecContext *avctx,
>>      assert(ctx_pic);
>>
>>      /* Fill up DXVA_PicParams_H264 */
>> -    fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp);
>> +    ff_dxva2_h264_fill_picture_parameters(avctx, ctx, &ctx_pic->pp);
>>
>>      /* Fill up DXVA_Qmatrix_H264 */
>> -    fill_scaling_lists(avctx, ctx, h, &ctx_pic->qm);
>> +    ff_dxva2_h264_fill_scaling_lists(avctx, ctx, &ctx_pic->qm);
>>
>>      ctx_pic->slice_count    = 0;
>>      ctx_pic->bitstream_size = 0;
>> diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
>> index b822af59cd..a9a1fc090e 100644
>> --- a/libavcodec/dxva2_internal.h
>> +++ b/libavcodec/dxva2_internal.h
>> @@ -26,18 +26,34 @@
>>  #define COBJMACROS
>>
>>  #include "config.h"
>> +#include "config_components.h"
>>
>>  /* define the proper COM entries before forcing desktop APIs */
>>  #include <objbase.h>
>>
>> +#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work
>around for
>> DXVA2/Direct3D11 and old UVD/UVD+ ATI video cards
>> +#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO    2 ///< Work
>around for
>> DXVA2/Direct3D11 and old Intel GPUs with ClearVideo interface
>> +
>>  #if CONFIG_DXVA2
>>  #include "dxva2.h"
>>  #include "libavutil/hwcontext_dxva2.h"
>> +#define DXVA2_VAR(ctx, var) ctx->dxva2.var
>> +#else
>> +#define DXVA2_VAR(ctx, var) 0
>>  #endif
>> +
>>  #if CONFIG_D3D11VA
>>  #include "d3d11va.h"
>>  #include "libavutil/hwcontext_d3d11va.h"
>> +#define D3D11VA_VAR(ctx, var) ctx->d3d11va.var
>> +#else
>> +#define D3D11VA_VAR(ctx, var) 0
>> +#endif
>> +
>> +#if CONFIG_D3D12VA
>> +#include "d3d12va.h"
>>  #endif
>> +
>>  #if HAVE_DXVA_H
>>  /* When targeting WINAPI_FAMILY_PHONE_APP or WINAPI_FAMILY_APP,
>dxva.h
>>   * defines nothing. Force the struct definitions to be visible. */
>> @@ -62,6 +78,9 @@ typedef union {
>>  #if CONFIG_DXVA2
>>      struct dxva_context      dxva2;
>>  #endif
>> +#if CONFIG_D3D12VA
>> +    struct D3D12VADecodeContext d3d12va;
>> +#endif
>>  } AVDXVAContext;
>>
>>  typedef struct FFDXVASharedContext {
>> @@ -101,39 +120,19 @@ typedef struct FFDXVASharedContext {
>>  #define D3D11VA_CONTEXT(ctx) (&ctx->d3d11va)
>>  #define DXVA2_CONTEXT(ctx)   (&ctx->dxva2)
>>
>> -#if CONFIG_D3D11VA && CONFIG_DXVA2
>> -#define DXVA_CONTEXT_WORKAROUND(avctx,
>ctx)     (ff_dxva2_is_d3d11(avctx) ?
>> ctx->d3d11va.workaround : ctx->dxva2.workaround)
>> -#define DXVA_CONTEXT_COUNT(avctx, ctx)          (ff_dxva2_is_d3d11(avctx) ?
>> ctx->d3d11va.surface_count : ctx->dxva2.surface_count)
>> -#define DXVA_CONTEXT_DECODER(avctx,
>ctx)        (ff_dxva2_is_d3d11(avctx) ?
>> (void *)ctx->d3d11va.decoder : (void *)ctx->dxva2.decoder)
>> -#define DXVA_CONTEXT_REPORT_ID(avctx,
>ctx)      (*(ff_dxva2_is_d3d11(avctx) ?
>> &ctx->d3d11va.report_id : &ctx->dxva2.report_id))
>> -#define DXVA_CONTEXT_CFG(avctx, ctx)            (ff_dxva2_is_d3d11(avctx) ?
>> (void *)ctx->d3d11va.cfg : (void *)ctx->dxva2.cfg)
>> -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx,
>ctx)  (ff_dxva2_is_d3d11(avctx) ?
>> ctx->d3d11va.cfg->ConfigBitstreamRaw : ctx->dxva2.cfg-
>>ConfigBitstreamRaw)
>> -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx)
>(ff_dxva2_is_d3d11(avctx) ?
>> ctx->d3d11va.cfg->ConfigIntraResidUnsigned : ctx->dxva2.cfg-
>> >ConfigIntraResidUnsigned)
>> -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx)
>(ff_dxva2_is_d3d11(avctx) ?
>> ctx->d3d11va.cfg->ConfigResidDiffAccelerator : ctx->dxva2.cfg-
>> >ConfigResidDiffAccelerator)
>> +#define DXVA2_CONTEXT_VAR(avctx, ctx, var) (avctx->pix_fmt ==
>> AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? D3D11VA_VAR(ctx,
>var) :
>> DXVA2_VAR(ctx, var)))
>> +
>> +#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)
>> (*ff_dxva2_get_report_id(avctx, ctx))
>> +#define DXVA_CONTEXT_WORKAROUND(avctx,
>ctx)     DXVA2_CONTEXT_VAR(avctx, ctx,
>> workaround)
>> +#define DXVA_CONTEXT_COUNT(avctx,
>ctx)          DXVA2_CONTEXT_VAR(avctx, ctx,
>> surface_count)
>> +#define DXVA_CONTEXT_DECODER(avctx, ctx)        (avctx->pix_fmt ==
>> AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void
>*)D3D11VA_VAR(ctx,
>> decoder) : (void *)DXVA2_VAR(ctx, decoder)))
>> +#define DXVA_CONTEXT_CFG(avctx, ctx)            (avctx->pix_fmt ==
>> AV_PIX_FMT_D3D12 ? 0 : (ff_dxva2_is_d3d11(avctx) ? (void
>*)D3D11VA_VAR(ctx,
>> cfg) : (void *)DXVA2_VAR(ctx, cfg)))
>> +#define DXVA_CONTEXT_CFG_BITSTREAM(avctx,
>ctx)  DXVA2_CONTEXT_VAR(avctx, ctx,
>> cfg->ConfigBitstreamRaw)
>> +#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx)
>DXVA2_CONTEXT_VAR(avctx, ctx,
>> cfg->ConfigIntraResidUnsigned)
>> +#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx)
>DXVA2_CONTEXT_VAR(avctx, ctx,
>> cfg->ConfigResidDiffAccelerator)
>>  #define DXVA_CONTEXT_VALID(avctx,
>ctx)          (DXVA_CONTEXT_DECODER(avctx,
>> ctx) && \
>>                                                   DXVA_CONTEXT_CFG(avctx,
>> ctx)     && \
>> -                                                 (ff_dxva2_is_d3d11(avctx) ||
>> ctx->dxva2.surface_count))
>> -#elif CONFIG_DXVA2
>> -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     (ctx-
>>dxva2.workaround)
>> -#define DXVA_CONTEXT_COUNT(avctx, ctx)          (ctx->dxva2.surface_count)
>> -#define DXVA_CONTEXT_DECODER(avctx, ctx)        (ctx->dxva2.decoder)
>> -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)      (*(&ctx-
>>dxva2.report_id))
>> -#define DXVA_CONTEXT_CFG(avctx, ctx)            (ctx->dxva2.cfg)
>> -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx)  (ctx->dxva2.cfg-
>> >ConfigBitstreamRaw)
>> -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->dxva2.cfg-
>> >ConfigIntraResidUnsigned)
>> -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->dxva2.cfg-
>> >ConfigResidDiffAccelerator)
>> -#define DXVA_CONTEXT_VALID(avctx, ctx)          (ctx->dxva2.decoder &&
>ctx-
>> >dxva2.cfg && ctx->dxva2.surface_count)
>> -#elif CONFIG_D3D11VA
>> -#define DXVA_CONTEXT_WORKAROUND(avctx, ctx)     (ctx-
>>d3d11va.workaround)
>> -#define DXVA_CONTEXT_COUNT(avctx, ctx)          (ctx-
>>d3d11va.surface_count)
>> -#define DXVA_CONTEXT_DECODER(avctx, ctx)        (ctx->d3d11va.decoder)
>> -#define DXVA_CONTEXT_REPORT_ID(avctx, ctx)      (*(&ctx-
>>d3d11va.report_id))
>> -#define DXVA_CONTEXT_CFG(avctx, ctx)            (ctx->d3d11va.cfg)
>> -#define DXVA_CONTEXT_CFG_BITSTREAM(avctx, ctx)  (ctx->d3d11va.cfg-
>> >ConfigBitstreamRaw)
>> -#define DXVA_CONTEXT_CFG_INTRARESID(avctx, ctx) (ctx->d3d11va.cfg-
>> >ConfigIntraResidUnsigned)
>> -#define DXVA_CONTEXT_CFG_RESIDACCEL(avctx, ctx) (ctx->d3d11va.cfg-
>> >ConfigResidDiffAccelerator)
>> -#define DXVA_CONTEXT_VALID(avctx, ctx)          (ctx->d3d11va.decoder &&
>ctx-
>> >d3d11va.cfg)
>> -#endif
>> +                                                 (ff_dxva2_is_d3d11(avctx) ||
>> DXVA2_VAR(ctx, surface_count)))
>>
>>  unsigned ff_dxva2_get_surface_index(const AVCodecContext *avctx,
>>                                      const AVDXVAContext *,
>> @@ -161,4 +160,10 @@ int
>ff_dxva2_common_frame_params(AVCodecContext *avctx,
>>
>>  int ff_dxva2_is_d3d11(const AVCodecContext *avctx);
>>
>> +unsigned *ff_dxva2_get_report_id(const AVCodecContext *avctx,
>AVDXVAContext
>> *ctx);
>> +
>> +void ff_dxva2_h264_fill_picture_parameters(const AVCodecContext *avctx,
>> AVDXVAContext *ctx, DXVA_PicParams_H264 *pp);
>> +
>> +void ff_dxva2_h264_fill_scaling_lists(const AVCodecContext *avctx,
>> AVDXVAContext *ctx, DXVA_Qmatrix_H264 *qm);
>> +
>>  #endif /* AVCODEC_DXVA2_INTERNAL_H */
>> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
>> index 41bf30eefc..df70ad8a2f 100644
>> --- a/libavcodec/h264_slice.c
>> +++ b/libavcodec/h264_slice.c
>> @@ -778,6 +778,7 @@ static enum AVPixelFormat
>get_pixel_format(H264Context *h,
>> int force_callback)
>>  {
>>  #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \
>>                       (CONFIG_H264_D3D11VA_HWACCEL * 2) + \
>> +                     CONFIG_H264_D3D12VA_HWACCEL + \
>>                       CONFIG_H264_NVDEC_HWACCEL + \
>>                       CONFIG_H264_VAAPI_HWACCEL + \
>>                       CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \
>> @@ -883,6 +884,9 @@ static enum AVPixelFormat
>get_pixel_format(H264Context *h,
>> int force_callback)
>>              *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
>>              *fmt++ = AV_PIX_FMT_D3D11;
>>  #endif
>> +#if CONFIG_H264_D3D12VA_HWACCEL
>> +            *fmt++ = AV_PIX_FMT_D3D12;
>> +#endif
>>  #if CONFIG_H264_VAAPI_HWACCEL
>>              *fmt++ = AV_PIX_FMT_VAAPI;
>>  #endif
>> diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
>> index 19f8dba131..853d3262f7 100644
>> --- a/libavcodec/h264dec.c
>> +++ b/libavcodec/h264dec.c
>> @@ -1089,6 +1089,9 @@ const FFCodec ff_h264_decoder = {
>>  #if CONFIG_H264_D3D11VA2_HWACCEL
>>                                 HWACCEL_D3D11VA2(h264),
>>  #endif
>> +#if CONFIG_H264_D3D12VA_HWACCEL
>> +                               HWACCEL_D3D12VA(h264),
>> +#endif
>>  #if CONFIG_H264_NVDEC_HWACCEL
>>                                 HWACCEL_NVDEC(h264),
>>  #endif
>> diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
>> index 48dfc17f72..be54604b81 100644
>> --- a/libavcodec/hwaccels.h
>> +++ b/libavcodec/hwaccels.h
>> @@ -32,6 +32,7 @@ extern const AVHWAccel ff_h263_vaapi_hwaccel;
>>  extern const AVHWAccel ff_h263_videotoolbox_hwaccel;
>>  extern const AVHWAccel ff_h264_d3d11va_hwaccel;
>>  extern const AVHWAccel ff_h264_d3d11va2_hwaccel;
>> +extern const AVHWAccel ff_h264_d3d12va_hwaccel;
>>  extern const AVHWAccel ff_h264_dxva2_hwaccel;
>>  extern const AVHWAccel ff_h264_nvdec_hwaccel;
>>  extern const AVHWAccel ff_h264_vaapi_hwaccel;
>> diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
>> index e8c6186151..e20118c096 100644
>> --- a/libavcodec/hwconfig.h
>> +++ b/libavcodec/hwconfig.h
>> @@ -82,6 +82,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx);
>>      HW_CONFIG_HWACCEL(1, 1, 1, VULKAN,       VULKAN,       ff_ ## codec ##
>> _vulkan_hwaccel)
>>  #define HWACCEL_D3D11VA(codec) \
>>      HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD,  NONE,         ff_ ## codec
>##
>> _d3d11va_hwaccel)
>> +#define HWACCEL_D3D12VA(codec) \
>> +    HW_CONFIG_HWACCEL(1, 1, 0, D3D12,        D3D12VA,      ff_ ## codec ##
>> _d3d12va_hwaccel)
>>
>>  #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format,
>device_type_) \
>>      &(const AVCodecHWConfigInternal) { \



More information about the ffmpeg-devel mailing list