[FFmpeg-devel] [PATCH v2 12/12] ffv1dec_vulkan: add a Vulkan compute-based hardware decoding implementation
Lynne
dev at lynne.ee
Mon Feb 24 10:05:09 EET 2025
This commit adds a Vulkan-based accelerated decoding of FFv1.
Currently, arithmetic coding, version 3 and 4 are supported, without RGB.
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/ffv1.h | 1 +
libavcodec/ffv1dec.c | 20 +-
libavcodec/hwaccels.h | 1 +
libavcodec/vulkan/Makefile | 5 +
libavcodec/vulkan/ffv1_dec.comp | 168 +++++
libavcodec/vulkan/ffv1_dec_setup.comp | 113 +++
libavcodec/vulkan/rangecoder.comp | 74 ++
libavcodec/vulkan_decode.c | 6 +
libavcodec/vulkan_ffv1.c | 985 ++++++++++++++++++++++++++
11 files changed, 1373 insertions(+), 3 deletions(-)
create mode 100644 libavcodec/vulkan/ffv1_dec.comp
create mode 100644 libavcodec/vulkan/ffv1_dec_setup.comp
create mode 100644 libavcodec/vulkan_ffv1.c
diff --git a/configure b/configure
index f76f946dfe..fc007f3ef0 100755
--- a/configure
+++ b/configure
@@ -3193,6 +3193,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox"
av1_videotoolbox_hwaccel_select="av1_decoder"
av1_vulkan_hwaccel_deps="vulkan"
av1_vulkan_hwaccel_select="av1_decoder"
+ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler"
+ffv1_vulkan_hwaccel_select="ffv1_decoder"
h263_vaapi_hwaccel_deps="vaapi"
h263_vaapi_hwaccel_select="h263_decoder"
h263_videotoolbox_hwaccel_deps="videotoolbox"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 0e96b33ef3..22bebd3096 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1017,6 +1017,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o
OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o
OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL) += videotoolbox_av1.o
OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o
+OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o
OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o
OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 9af17326b3..5d0514f923 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -126,6 +126,7 @@ typedef struct FFV1Context {
void *hwaccel_picture_private, *hwaccel_last_picture_private;
uint32_t crcref;
enum AVPixelFormat pix_fmt;
+ enum AVPixelFormat configured_pix_fmt;
const AVFrame *cur_enc_frame;
int plane_count;
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 5ab41da1b7..7940d86b25 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -341,6 +341,9 @@ static int decode_slice(AVCodecContext *c, void *arg)
static enum AVPixelFormat get_pixel_format(FFV1Context *f)
{
enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_FFV1_VULKAN_HWACCEL
+ AV_PIX_FMT_VULKAN,
+#endif
f->pix_fmt,
AV_PIX_FMT_NONE,
};
@@ -360,9 +363,12 @@ static int read_header(FFV1Context *f, RangeCoder *c)
if (ret < 0)
return ret;
- f->avctx->pix_fmt = get_pixel_format(f);
- if (f->avctx->pix_fmt < 0)
- return AVERROR(EINVAL);
+ if (f->configured_pix_fmt != f->pix_fmt) {
+ f->avctx->pix_fmt = get_pixel_format(f);
+ if (f->avctx->pix_fmt < 0)
+ return AVERROR(EINVAL);
+ f->configured_pix_fmt = f->pix_fmt;
+ }
ff_dlog(f->avctx, "%d %d %d\n",
f->chroma_h_shift, f->chroma_v_shift, f->pix_fmt);
@@ -455,6 +461,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
FFV1Context *f = avctx->priv_data;
int ret;
+ f->pix_fmt = AV_PIX_FMT_NONE;
+ f->configured_pix_fmt = AV_PIX_FMT_NONE;
+
f->pkt_ref = av_packet_alloc();
if (!f->pkt_ref)
return AVERROR(ENOMEM);
@@ -744,6 +753,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
ff_progress_frame_report(&f->picture, INT_MAX);
ff_progress_frame_unref(&f->last_picture);
+ av_refstruct_unref(&f->hwaccel_picture_private); // TODO: fixme
av_refstruct_unref(&f->hwaccel_last_picture_private);
if ((ret = av_frame_ref(rframe, f->picture.f)) < 0)
return ret;
@@ -773,6 +783,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
fdst->ac = fsrc->ac;
fdst->colorspace = fsrc->colorspace;
fdst->pix_fmt = fsrc->pix_fmt;
+ fdst->configured_pix_fmt = fsrc->configured_pix_fmt;
fdst->ec = fsrc->ec;
fdst->intra = fsrc->intra;
@@ -844,6 +855,9 @@ const FFCodec ff_ffv1_decoder = {
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP |
FF_CODEC_CAP_USES_PROGRESSFRAMES,
.hw_configs = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_FFV1_VULKAN_HWACCEL
+ HWACCEL_VULKAN(ffv1),
+#endif
NULL
},
};
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 910a024032..0b2c725247 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel;
extern const struct FFHWAccel ff_av1_vdpau_hwaccel;
extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_av1_vulkan_hwaccel;
+extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel;
extern const struct FFHWAccel ff_h263_vaapi_hwaccel;
extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_h264_d3d11va_hwaccel;
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 351332ee44..feb5d2ea51 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -11,6 +11,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \
vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o
+OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
+ vulkan/rangecoder.o vulkan/ffv1_vlc.o \
+ vulkan/ffv1_common.o vulkan/ffv1_reset.o \
+ vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o
+
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)
libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
new file mode 100644
index 0000000000..3d7fdb3e38
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -0,0 +1,168 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec2 get_pred(ivec2 pos, ivec2 off, int p, int comp, int sw,
+ uint8_t context_model)
+{
+ const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
+ const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
+
+ TYPE top2 = TYPE(0);
+ if (off.y > 1)
+ top2 = TYPE(imageLoad(dst[p], pos + ivec2(0, -2))[comp]);
+
+ VTYPE3 top = VTYPE3(TYPE(0),
+ TYPE(0),
+ TYPE(0));
+ if (off.y > 0 && off != ivec2(0, 1))
+ top[0] = TYPE(imageLoad(dst[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
+ if (off.y > 0) {
+ top[1] = TYPE(imageLoad(dst[p], pos + ivec2(0, -1))[comp]);
+ top[2] = TYPE(imageLoad(dst[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
+ }
+
+ VTYPE2 cur = VTYPE2(TYPE(0),
+ TYPE(0));
+ if (off.x > 0 && off != ivec2(1, 0))
+ cur[0] = TYPE(imageLoad(dst[p], pos + ivec2(-2, 0) + yoff_border2)[comp]);
+ if (off != ivec2(0, 0))
+ cur[1] = TYPE(imageLoad(dst[p], pos + ivec2(-1, 0) + yoff_border1)[comp]);
+
+ /* context, prediction */
+ return ivec2(get_context(cur, top, top2, context_model),
+ predict(cur[1], VTYPE2(top)));
+}
+
+#ifndef GOLOMB
+int get_isymbol(inout RangeCoder c, uint64_t state)
+{
+ if (get_rac(c, state))
+ return 0;
+
+ state += 1;
+
+ int e = 0;
+ while (get_rac(c, state + min(e, 9))) { // 1..10
+ e++;
+ if (e > 31) {
+ corrupt = true;
+ return 0;
+ }
+ }
+
+ state += 21;
+
+ int a = 1;
+ for (int i = e - 1; i >= 0; i--)
+ a += a + int(get_rac(c, state + min(i, 9))); // 22..31
+
+ e = -int(get_rac(c, state - 11 + min(e, 10))); // 11..21 sign
+ return (a ^ e) - e;
+}
+
+void decode_line_pcm(inout SliceContext sc, int y, int p, int comp,
+ int bits)
+{
+ ivec2 sp = sc.slice_pos;
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ for (int x = 0; x < w; x++) {
+ uint v = 0;
+ for (int i = (bits - 1); i >= 0; i--)
+ v |= uint(get_rac_equi(sc.c)) << i;
+ imageStore(dst[p], (sp + ivec2(x, y)), uvec4(v));
+ }
+}
+
+void decode_line(inout SliceContext sc, uint64_t state,
+ int y, int p, int comp, int bits, const int run_index)
+{
+ ivec2 sp = sc.slice_pos;
+
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ for (int x = 0; x < w; x++) {
+ ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w,
+ sc.quant_table_idx[p]);
+
+ int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*abs(pr[0]));
+
+ if (pr[0] < 0)
+ diff = -diff;
+
+ int pix = zero_extend(pr[1] + diff, bits);
+ imageStore(dst[p], (sp + ivec2(x, y)), uvec4(pix));
+ }
+}
+#endif
+
+void decode_slice(inout SliceContext sc, const uint slice_idx)
+{
+ int bits = bits_per_raw_sample;
+
+#ifndef GOLOMB
+ if (sc.slice_coding_mode == 1) {
+ for (int p = 0; p < planes; p++) {
+
+ int h = sc.slice_dim.y;
+ if (p > 0 && p < 3)
+ h >>= chroma_shift.y;
+
+ for (int y = 0; y < h; y++)
+ decode_line_pcm(sc, y, p, 0, bits);
+ }
+ } else
+#endif
+ {
+ uint64_t slice_state_off = uint64_t(slice_state) +
+ slice_idx*plane_state_size*codec_planes;
+
+ for (int p = 0; p < planes; p++) {
+ int run_index = 0;
+
+ int h = sc.slice_dim.y;
+ if (p > 0 && p < 3)
+ h >>= chroma_shift.y;
+
+ for (int y = 0; y < h; y++)
+ decode_line(sc, slice_state_off, y, p, 0, bits, run_index);
+
+ /* For the second chroma plane, reuse the first plane's state */
+ if (p != 1)
+ slice_state_off += plane_state_size;
+ }
+ }
+}
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ decode_slice(slice_ctx[slice_idx], slice_idx);
+}
diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp
new file mode 100644
index 0000000000..11c367efee
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp
@@ -0,0 +1,113 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+uint get_usymbol(inout RangeCoder c, uint64_t state)
+{
+ if (get_rac(c, state + 0))
+ return 0;
+
+ int e = 0;
+ while (get_rac(c, state + 1 + min(e, 9))) { // 1..10
+ e++;
+ if (e > 31) {
+ corrupt = true;
+ return 0;
+ }
+ }
+
+ uint a = 1;
+ for (int i = e - 1; i >= 0; i--)
+ a += a + uint(get_rac(c, state + 22 + min(i, 9))); // 22..31
+
+ return a;
+}
+
+bool decode_slice_header(inout SliceContext sc, uint64_t state)
+{
+ u8buf sb = u8buf(state);
+
+ [[unroll]]
+ for (int i = 0; i < CONTEXT_SIZE; i++)
+ sb[i].v = uint8_t(128);
+
+ uint sx = get_usymbol(sc.c, state);
+ uint sy = get_usymbol(sc.c, state);
+ uint sw = get_usymbol(sc.c, state) + 1;
+ uint sh = get_usymbol(sc.c, state) + 1;
+
+ if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 ||
+ sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) ||
+ corrupt) {
+ return true;
+ }
+
+ /* Set coordinates */
+ uint sxs = slice_coord(img_size.x, sx , gl_NumWorkGroups.x, chroma_shift.x);
+ uint sxe = slice_coord(img_size.x, sx + sw, gl_NumWorkGroups.x, chroma_shift.x);
+ uint sys = slice_coord(img_size.y, sy , gl_NumWorkGroups.y, chroma_shift.y);
+ uint sye = slice_coord(img_size.y, sy + sh, gl_NumWorkGroups.y, chroma_shift.y);
+
+ sc.slice_pos = ivec2(sxs, sys);
+ sc.slice_dim = ivec2(sxe - sxs, sye - sys);
+ sc.slice_rct_coef = ivec2(1, 1);
+ sc.slice_coding_mode = int(0);
+
+ for (uint i = 0; i < codec_planes; i++) {
+ uint idx = get_usymbol(sc.c, state);
+ if (idx >= quant_table_count)
+ return true;
+ sc.quant_table_idx[i] = uint8_t(idx);
+ sc.context_count = context_count[idx];
+ }
+
+ get_usymbol(sc.c, state);
+ get_usymbol(sc.c, state);
+ get_usymbol(sc.c, state);
+
+ if (version >= 4) {
+ sc.slice_reset_contexts = get_rac(sc.c, state);
+ sc.slice_coding_mode = get_usymbol(sc.c, state);
+ if (sc.slice_coding_mode != 1 && colorspace == 1) {
+ sc.slice_rct_coef.x = int(get_usymbol(sc.c, state));
+ sc.slice_rct_coef.y = int(get_usymbol(sc.c, state));
+ if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
+
+ rac_init_dec(slice_ctx[slice_idx].c,
+ u8buf(slice_data + slice_offsets[slice_idx]),
+ slice_offsets[slice_idx + 1] - slice_offsets[slice_idx]);
+
+ if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1))
+ get_rac_equi(slice_ctx[slice_idx].c);
+
+ decode_slice_header(slice_ctx[slice_idx], scratch_state);
+}
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 6e3b9c1238..8c8d0d9d9c 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -191,3 +191,77 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size)
r.outstanding_count = uint16_t(0);
r.outstanding_byte = uint8_t(0xFF);
}
+
+/* Decoder */
+uint overread;
+bool corrupt;
+
+void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size)
+{
+ overread = 0;
+ corrupt = false;
+
+ /* Skip priming bytes */
+ rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2);
+
+ u8vec2 prime = u8vec2buf(data).v;
+ /* Switch endianess of the priming bytes */
+ r.low = pack16(prime.yx);
+
+ if (r.low >= 0xFF00) {
+ r.low = 0xFF00;
+ r.bytestream_end = uint64_t(data) + 2;
+ }
+}
+
+void refill(inout RangeCoder c)
+{
+ c.range <<= 8;
+ c.low <<= 8;
+ if (c.bytestream < c.bytestream_end) {
+ c.low += u8buf(c.bytestream).v;
+ c.bytestream++;
+ } else {
+ overread++;
+ }
+}
+
+bool get_rac(inout RangeCoder c, uint64_t state)
+{
+ u8buf sb = u8buf(state);
+ uint val = uint(sb.v);
+ uint16_t range1 = uint16_t((uint(c.range) * val) >> 8);
+
+ c.range -= range1;
+
+ bool bit = c.low >= c.range;
+ sb.v = zero_one_state[(uint(bit) << 8) + val];
+
+ if (bit) {
+ c.low -= c.range;
+ c.range = range1;
+ }
+
+ if (c.range < 0x100)
+ refill(c);
+
+ return bit;
+}
+
+bool get_rac_equi(inout RangeCoder c)
+{
+ uint16_t range1 = c.range >> 1;
+
+ c.range -= range1;
+
+ bool bit = c.low >= c.range;
+ if (bit) {
+ c.low -= c.range;
+ c.range = range1;
+ }
+
+ if (c.range < 0x100)
+ refill(c);
+
+ return bit;
+}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 9eaafa6495..b910834566 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -36,6 +36,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc;
#if CONFIG_AV1_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc;
#endif
+#if CONFIG_FFV1_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
+#endif
static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_H264_VULKAN_HWACCEL
@@ -47,6 +50,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_AV1_VULKAN_HWACCEL
&ff_vk_dec_av1_desc,
#endif
+#if CONFIG_FFV1_VULKAN_HWACCEL
+ &ff_vk_dec_ffv1_desc,
+#endif
};
static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id)
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
new file mode 100644
index 0000000000..6356f3163c
--- /dev/null
+++ b/libavcodec/vulkan_ffv1.c
@@ -0,0 +1,985 @@
+/*
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+
+#include "ffv1.h"
+#include "ffv1_vulkan.h"
+#include "libavutil/vulkan_spirv.h"
+#include "libavutil/mem.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_rangecoder_comp;
+extern const char *ff_source_ffv1_vlc_comp;
+extern const char *ff_source_ffv1_common_comp;
+extern const char *ff_source_ffv1_dec_setup_comp;
+extern const char *ff_source_ffv1_reset_comp;
+extern const char *ff_source_ffv1_dec_comp;
+extern const char *ff_source_ffv1_dec_rct_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
+ .codec_id = AV_CODEC_ID_FFV1,
+ .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
+ .queue_flags = VK_QUEUE_COMPUTE_BIT,
+};
+
+typedef struct FFv1VulkanDecodePicture {
+ FFVulkanDecodePicture vp;
+
+ VkImageView img_view_rct;
+ AVFrame *rct;
+
+ AVBufferRef *tmp_data;
+
+ AVBufferRef *slice_state;
+ uint32_t plane_state_size;
+ uint32_t slice_state_size;
+ uint32_t slice_data_size;
+ uint32_t max_context_count;
+
+ AVBufferRef *slice_offset_buf;
+ uint32_t *slice_offset;
+ int slice_num;
+} FFv1VulkanDecodePicture;
+
+typedef struct FFv1VulkanDecodeContext {
+ AVBufferRef *intermediate_frames_ref;
+
+ FFVulkanShader setup;
+ FFVulkanShader reset[2]; /* AC/Golomb */
+ FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */
+ FFVulkanShader rct[2]; /* 16/32 bit */
+
+ FFVkBuffer rangecoder_static_buf;
+ FFVkBuffer quant_buf;
+ FFVkBuffer crc_tab_buf;
+
+ AVBufferPool *slice_state_pool;
+ AVBufferPool *tmp_data_pool;
+ AVBufferPool *slice_offset_pool;
+} FFv1VulkanDecodeContext;
+
+typedef struct FFv1VkResetParameters {
+ VkDeviceAddress slice_state;
+ uint32_t plane_state_size;
+ uint32_t context_count;
+ uint8_t codec_planes;
+ uint8_t key_frame;
+ uint8_t version;
+ uint8_t micro_version;
+ uint8_t padding[1];
+} FFv1VkResetParameters;
+
+typedef struct FFv1VkParameters {
+ uint32_t context_count[MAX_QUANT_TABLES];
+
+ VkDeviceAddress slice_data;
+ VkDeviceAddress slice_state;
+ VkDeviceAddress scratch_data;
+
+ uint32_t img_size[2];
+ uint32_t chroma_shift[2];
+
+ uint32_t plane_state_size;
+ uint32_t crcref;
+
+ uint8_t bits_per_raw_sample;
+ uint8_t quant_table_count;
+ uint8_t version;
+ uint8_t micro_version;
+ uint8_t key_frame;
+ uint8_t planes;
+ uint8_t codec_planes;
+ uint8_t transparency;
+ uint8_t colorspace;
+ uint8_t ec;
+ uint8_t padding[2];
+} FFv1VkParameters;
+
+static void add_push_data(FFVulkanShader *shd)
+{
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
+ GLSLC(0, );
+ GLSLC(1, u8buf slice_data; );
+ GLSLC(1, u8buf slice_state; );
+ GLSLC(1, u8buf scratch_data; );
+ GLSLC(0, );
+ GLSLC(1, uvec2 img_size; );
+ GLSLC(1, uvec2 chroma_shift; );
+ GLSLC(0, );
+ GLSLC(1, uint plane_state_size; );
+ GLSLC(1, uint32_t crcref; );
+ GLSLC(0, );
+ GLSLC(1, uint8_t bits_per_raw_sample; );
+ GLSLC(1, uint8_t quant_table_count; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t key_frame; );
+ GLSLC(1, uint8_t planes; );
+ GLSLC(1, uint8_t codec_planes; );
+ GLSLC(1, uint8_t transparency; );
+ GLSLC(1, uint8_t colorspace; );
+ GLSLC(1, uint8_t ec; );
+ GLSLC(1, uint8_t padding[2]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static int vk_ffv1_start_frame(AVCodecContext *avctx,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int err;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+ FFV1Context *f = avctx->priv_data;
+
+ FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &fp->vp;
+ FFVkBuffer *slice_offset;
+
+ fp->slice_num = 0;
+
+ for (int i = 0; i < f->quant_table_count; i++)
+ fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count);
+
+ /* Allocate slice buffer data */
+ if (f->ac == AC_GOLOMB_RICE)
+ fp->plane_state_size = 8;
+ else
+ fp->plane_state_size = CONTEXT_SIZE;
+
+ fp->plane_state_size *= fp->max_context_count;
+ fp->slice_state_size = fp->plane_state_size*f->plane_count;
+
+ fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */
+ fp->slice_state_size += fp->slice_data_size;
+ fp->slice_state_size = FFALIGN(fp->slice_state_size, 8);
+
+#if 0
+ /* Host map the input slices data */
+ err = ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, f->pkt_ref->buf,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+ if (err < 0)
+ return err;
+#endif
+
+ /* Allocate slice state data */
+ if (f->picture.f->flags & AV_FRAME_FLAG_KEY) {
+ err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool,
+ &fp->slice_state,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, f->max_slice_count*fp->slice_state_size,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (err < 0)
+ return err;
+ } else {
+ FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private;
+ fp->slice_state = av_buffer_ref(fpl->slice_state);
+ if (!fp->slice_state)
+ return AVERROR(ENOMEM);
+ }
+
+ /* Allocate temporary data buffer */
+ err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool,
+ &fp->tmp_data,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, f->max_slice_count*CONTEXT_SIZE,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (err < 0)
+ return err;
+
+ /* Allocate slice offsets buffer */
+ err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool,
+ &fp->slice_offset_buf,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, (f->max_slice_count + 1)*sizeof(uint32_t),
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0)
+ return err;
+
+ /* First offset is always 0 */
+ slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
+ AV_WN32(slice_offset->mapped_mem, 0);
+
+ /* Prepare frame to be used */
+ err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1,
+ FF_VK_REP_NATIVE, 0);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int vk_ffv1_decode_slice(AVCodecContext *avctx,
+ const uint8_t *data,
+ uint32_t size)
+{
+ FFV1Context *f = avctx->priv_data;
+
+ FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &fp->vp;
+ FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
+
+#if 1
+ int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+ &fp->slice_num,
+ (const uint32_t **)&fp->slice_offset);
+ if (err < 0)
+ return err;
+
+ AV_WN32(slice_offset->mapped_mem + fp->slice_num*sizeof(uint32_t),
+ fp->slice_offset[fp->slice_num - 1] + size);
+#else
+ FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
+
+ fp->slice_num++;
+ AV_WN32(slice_offset->mapped_mem + fp->slice_num*sizeof(uint32_t),
+ data - slices_buf->mapped_mem + size);
+#endif
+
+ return 0;
+}
+
+static int vk_ffv1_end_frame(AVCodecContext *avctx)
+{
+ int err;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ FFV1Context *f = avctx->priv_data;
+ FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+ FFv1VkParameters pd;
+ FFv1VkResetParameters pd_reset;
+
+ int is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) &&
+ !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8);
+
+ FFVulkanShader *reset_shader;
+ FFVulkanShader *decode_shader;
+
+ FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &fp->vp;
+
+ FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
+ FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data;
+ FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
+
+ FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data;
+
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+ VkBufferMemoryBarrier2 buf_bar[8];
+ int nb_buf_bar = 0;
+
+ FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+ ff_vk_exec_start(&ctx->s, exec);
+
+ /* Prepare deps */
+ RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1));
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
+ vp->slices_buf = NULL;
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0));
+ fp->slice_offset_buf = NULL;
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0));
+ fp->tmp_data = NULL;
+
+ /* Input frame barrier */
+ ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ /* Entry barrier */
+ if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_state->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_state->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_state->buf,
+ .offset = 0,
+ .size = VK_WHOLE_SIZE,
+ };
+ }
+
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ nb_img_bar = 0;
+ if (nb_buf_bar) {
+ slice_state->stage = buf_bar[1].dstStageMask;
+ slice_state->access = buf_bar[1].dstAccessMask;
+ nb_buf_bar = 0;
+ }
+
+ /* Setup shader */
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+ 1, 0, 0,
+ slice_state,
+ 0, fp->slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+ 1, 1, 0,
+ slice_offset,
+ 0, (f->slice_count + 1)*sizeof(uint32_t),
+ VK_FORMAT_UNDEFINED);
+
+ ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
+ pd = (FFv1VkParameters) {
+ /* context_count */
+
+ .slice_data = slices_buf->address,
+ .slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
+ .scratch_data = tmp_data->address,
+
+ .img_size[0] = f->picture.f->width,
+ .img_size[1] = f->picture.f->height,
+ .chroma_shift[0] = f->chroma_h_shift,
+ .chroma_shift[1] = f->chroma_v_shift,
+
+ .plane_state_size = fp->plane_state_size,
+ .crcref = f->crcref,
+
+ .bits_per_raw_sample = avctx->bits_per_raw_sample,
+ .quant_table_count = f->quant_table_count,
+ .version = f->version,
+ .micro_version = f->micro_version,
+ .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
+ .planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt),
+ .codec_planes = f->plane_count,
+ .transparency = f->transparency,
+ .colorspace = f->colorspace,
+ .ec = f->ec,
+ };
+ for (int i = 0; i < MAX_QUANT_TABLES; i++)
+ pd.context_count[i] = f->context_count[i];
+
+ ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+
+ vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+
+ /* Reset shader */
+ reset_shader = &fv->reset[f->ac == AC_GOLOMB_RICE];
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader,
+ 1, 0, 0,
+ slice_state,
+ 0, fp->slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+
+ ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader);
+
+ pd_reset = (FFv1VkResetParameters) {
+ .slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
+ .plane_state_size = fp->plane_state_size,
+ .context_count = fp->max_context_count,
+ .codec_planes = f->plane_count,
+ .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
+ .version = f->version,
+ .micro_version = f->micro_version,
+ };
+ ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd_reset), &pd_reset);
+
+ /* Sync between setup and reset shaders */
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_state->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_state->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_state->buf,
+ .offset = 0,
+ .size = fp->slice_data_size*f->slice_count,
+ };
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ slice_state->stage = buf_bar[0].dstStageMask;
+ slice_state->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+
+ vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices,
+ f->plane_count);
+
+ /* Decode */
+ decode_shader = &fv->decode[f->use32bit][f->ac == AC_GOLOMB_RICE][is_rgb];
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
+ 1, 0, 0,
+ slice_state,
+ 0, fp->slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
+ f->picture.f, vp->view.out,
+ 1, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
+ ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+
+ /* Sync between reset and decode shaders */
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_state->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_state->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_state->buf,
+ .offset = fp->slice_data_size*f->slice_count,
+ .size = slice_state->size - fp->slice_data_size*f->slice_count,
+ };
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ slice_state->stage = buf_bar[0].dstStageMask;
+ slice_state->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+
+ vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+
+ err = ff_vk_exec_submit(&ctx->s, exec);
+ if (err < 0)
+ return err;
+
+fail:
+ return 0;
+}
+
+static void define_shared_code(FFVulkanShader *shd, int use32bit)
+{
+ int smp_bits = use32bit ? 32 : 16;
+
+ av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
+
+ GLSLF(0, #define TYPE int%i_t ,smp_bits);
+ GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
+ GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
+ GLSLD(ff_source_rangecoder_comp);
+ GLSLD(ff_source_ffv1_common_comp);
+}
+
+static int init_setup_shader(FFV1Context *f, FFVulkanContext *s,
+ FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+ FFVulkanShader *shd)
+{
+ int err;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ 1, 1, 1,
+ 0));
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ add_push_data(shd);
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ {
+ .name = "crc_ieee_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint32_t crc_ieee[256];",
+ },
+ };
+
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0));
+
+ define_shared_code(shd, 0 /* Irrelevant */);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx",
+ .buf_elems = f->max_slice_count,
+ },
+ {
+ .name = "slice_offsets_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "readonly",
+ .buf_content = "uint32_t slice_offsets",
+ .buf_elems = f->max_slice_count + 1,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
+
+ GLSLD(ff_source_ffv1_dec_setup_comp);
+
+ RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_reset_shader(FFV1Context *f, FFVulkanContext *s,
+ FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+ FFVulkanShader *shd, int ac)
+{
+ int err;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+ int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
+
+ RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ wg_dim, 1, 1,
+ 0));
+
+ if (ac == AC_GOLOMB_RICE) {
+ av_bprintf(&shd->src, "#define PB_UNALIGNED\n");
+ av_bprintf(&shd->src, "#define GOLOMB\n");
+ }
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, u8buf slice_state; );
+ GLSLC(1, uint plane_state_size; );
+ GLSLC(1, uint context_count; );
+ GLSLC(1, uint8_t codec_planes; );
+ GLSLC(1, uint8_t key_frame; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t padding[1]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+ define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */);
+ if (ac == AC_GOLOMB_RICE)
+ GLSLD(ff_source_ffv1_vlc_comp);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx",
+ .buf_elems = f->max_slice_count,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
+
+ GLSLD(ff_source_ffv1_reset_comp);
+
+ RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
+ FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+ FFVulkanShader *shd, AVHWFramesContext *frames_ctx,
+ int use32bit, int ac, int rgb)
+{
+ int err;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ RET(ff_vk_shader_init(s, shd, "ffv1_dec",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ 1, 1, 1,
+ 0));
+
+ if (ac == AC_GOLOMB_RICE) {
+ av_bprintf(&shd->src, "#define PB_UNALIGNED\n");
+ av_bprintf(&shd->src, "#define GOLOMB\n");
+ }
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ add_push_data(shd);
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ {
+ .name = "crc_ieee_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint32_t crc_ieee[256];",
+ },
+ };
+
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0));
+
+ define_shared_code(shd, use32bit);
+ if (ac == AC_GOLOMB_RICE)
+ GLSLD(ff_source_ffv1_vlc_comp);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx",
+ .buf_elems = f->max_slice_count,
+ },
+ {
+ .name = "dst",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
+ FF_VK_REP_NATIVE),
+ .elems = av_pix_fmt_count_planes(frames_ctx->sw_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
+
+ GLSLD(ff_source_ffv1_dec_comp);
+
+ RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int vk_decode_ffv1_init(AVCodecContext *avctx)
+{
+ int err;
+ FFV1Context *f = avctx->priv_data;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = NULL;
+ FFv1VulkanDecodeContext *fv;
+ FFVkSPIRVCompiler *spv;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ err = ff_vk_decode_init(avctx);
+ if (err < 0)
+ return err;
+ ctx = dec->shared_ctx;
+
+ fv = ctx->sd_ctx = av_mallocz(sizeof(*fv));
+ if (!fv) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ /* Setup shader */
+ err = init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup);
+ if (err < 0)
+ return err;
+
+ /* Reset shaders */
+ for (int i = 0; i < 2; i++) { /* AC/Golomb */
+ err = init_reset_shader(f, &ctx->s, &ctx->exec_pool,
+ spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0);
+ if (err < 0)
+ return err;
+ }
+
+ /* Decode shaders */
+ for (int i = 0; i < 2; i++) { /* 16/32 bit */
+ for (int j = 0; j < 1; j++) { /* AC/Golomb */
+ for (int k = 0; k < 1; k++) { /* Normal/RGB */
+ AVHWFramesContext *frames_ctx;
+ frames_ctx = k ?(AVHWFramesContext *)fv->intermediate_frames_ref->data :
+ (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+
+ err = init_decode_shader(f, &ctx->s, &ctx->exec_pool,
+ spv, &fv->decode[i][j][k],
+ frames_ctx,
+ i,
+ !j ? AC_RANGE_CUSTOM_TAB : 0,
+ k);
+ if (err < 0)
+ return err;
+ }
+ }
+ }
+
+ /* Range coder data */
+ err = ff_ffv1_vk_init_state_transition_data(&ctx->s,
+ &fv->rangecoder_static_buf,
+ f);
+ if (err < 0)
+ return err;
+
+ /* Quantization table data */
+ err = ff_ffv1_vk_init_quant_table_data(&ctx->s,
+ &fv->quant_buf,
+ f);
+ if (err < 0)
+ return err;
+
+ /* CRC table buffer */
+ err = ff_ffv1_vk_init_crc_table_data(&ctx->s,
+ &fv->crc_tab_buf,
+ f);
+ if (err < 0)
+ return err;
+
+ /* Update setup global descriptors */
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &fv->setup, 0, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+
+ /* Update decode global descriptors */
+ for (int i = 0; i < 2; i++) { /* 16/32 bit */
+ for (int j = 0; j < 1; j++) { /* AC/Golomb */
+ for (int k = 0; k < 1; k++) { /* Normal/RGB */
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &fv->decode[i][j][k], 0, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &fv->decode[i][j][k], 0, 1, 0,
+ &fv->quant_buf,
+ 0, fv->quant_buf.size,
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &fv->decode[i][j][k], 0, 2, 0,
+ &fv->crc_tab_buf,
+ 0, fv->crc_tab_buf.size,
+ VK_FORMAT_UNDEFINED));
+ }
+ }
+ }
+
+fail:
+ return err;
+}
+
+static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+ AVHWDeviceContext *hwctx = _hwctx.nc;
+
+ FFv1VulkanDecodePicture *fp = data;
+ FFVulkanDecodePicture *vp = &fp->vp;
+
+ ff_vk_decode_free_frame(hwctx, vp);
+
+ av_buffer_unref(&vp->slices_buf);
+ av_buffer_unref(&fp->slice_state);
+ av_buffer_unref(&fp->slice_offset_buf);
+ av_buffer_unref(&fp->tmp_data);
+
+// FFVulkanFunctions *vk = &ctx->s.vkfn;
+// vk->DestroyImageView(hwctx->act_dev, fp->img_view_rct, hwctx->alloc);
+
+ av_frame_free(&fp->rct);
+}
+
+static int vk_decode_ffv1_uninit(AVCodecContext *avctx)
+{
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+
+ ff_vk_decode_uninit(avctx);
+
+ ff_vk_shader_free(&ctx->s, &fv->setup);
+
+ for (int i = 0; i < 2; i++) /* AC/Golomb */
+ ff_vk_shader_free(&ctx->s, &fv->reset[i]);
+
+ for (int i = 0; i < 2; i++) /* 16/32 bit */
+ for (int j = 0; j < 1; j++) /* AC/Golomb */
+ for (int k = 0; k < 1; k++) /* Normal/RGB */
+ ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]);
+
+ for (int i = 0; i < 2; i++) /* 16/32 bit */
+ ff_vk_shader_free(&ctx->s, &fv->rct[i]);
+
+ av_buffer_pool_uninit(&fv->tmp_data_pool);
+ av_buffer_pool_uninit(&fv->slice_state_pool);
+ av_buffer_pool_uninit(&fv->slice_offset_pool);
+
+ ff_vk_free_buf(&ctx->s, &fv->quant_buf);
+ ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
+ ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
+
+ return 0;
+}
+
+const FFHWAccel ff_ffv1_vulkan_hwaccel = {
+ .p.name = "ffv1_vulkan",
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_FFV1,
+ .p.pix_fmt = AV_PIX_FMT_VULKAN,
+ .start_frame = &vk_ffv1_start_frame,
+ .decode_slice = &vk_ffv1_decode_slice,
+ .end_frame = &vk_ffv1_end_frame,
+ .free_frame_priv = &vk_ffv1_free_frame_priv,
+ .frame_priv_data_size = sizeof(FFv1VulkanDecodePicture),
+ .init = &vk_decode_ffv1_init,
+ .update_thread_context = &ff_vk_update_thread_context,
+ .decode_params = &ff_vk_params_invalidate,
+ .flush = &ff_vk_decode_flush,
+ .uninit = &vk_decode_ffv1_uninit,
+ .frame_params = &ff_vk_frame_params,
+ .priv_data_size = sizeof(FFVulkanDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE /* | HWACCEL_CAP_THREAD_SAFE */,
+};
--
2.47.2
More information about the ffmpeg-devel
mailing list