29 #define RGB_LINECACHE 2
41 .queue_flags = VK_QUEUE_COMPUTE_BIT,
108 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
109 GLSLC(1, u8buf slice_data; );
110 GLSLC(1, u8buf slice_state; );
112 GLSLC(1, ivec4 fmt_lut; );
113 GLSLC(1, uvec2 img_size; );
114 GLSLC(1, uvec2 chroma_shift; );
116 GLSLC(1, uint plane_state_size; );
117 GLSLC(1, uint32_t crcref; );
118 GLSLC(1,
int rct_offset; );
120 GLSLC(1, uint8_t extend_lookup[8]; );
121 GLSLC(1, uint8_t bits_per_raw_sample; );
122 GLSLC(1, uint8_t quant_table_count; );
124 GLSLC(1, uint8_t micro_version; );
125 GLSLC(1, uint8_t key_frame; );
127 GLSLC(1, uint8_t codec_planes; );
128 GLSLC(1, uint8_t color_planes; );
129 GLSLC(1, uint8_t transparency; );
130 GLSLC(1, uint8_t planar_rgb; );
131 GLSLC(1, uint8_t colorspace; );
132 GLSLC(1, uint8_t ec; );
133 GLSLC(1, uint8_t golomb; );
134 GLSLC(1, uint8_t check_crc; );
135 GLSLC(1, uint8_t padding[3]; );
138 VK_SHADER_STAGE_COMPUTE_BIT);
165 for (
int i = 0;
i <
f->quant_table_count;
i++)
166 max_contexts =
FFMAX(
f->context_count[
i], max_contexts);
187 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
188 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
194 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
195 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
197 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
210 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
211 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
212 NULL, 2*
f->slice_count*
sizeof(uint32_t),
213 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
214 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
221 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
222 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
223 NULL, 2*
f->slice_count*
sizeof(uint32_t),
224 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
225 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
262 if (slices_buf && slices_buf->host_ref) {
264 data - slices_buf->mapped_mem);
300 int bits =
f->avctx->bits_per_raw_sample > 0 ?
f->avctx->bits_per_raw_sample : 8;
319 VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->
view.
out;
321 VkImageMemoryBarrier2 img_bar[37];
323 VkBufferMemoryBarrier2 buf_bar[8];
331 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
332 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
343 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
344 VK_PIPELINE_STAGE_2_CLEAR_BIT));
346 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
347 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
348 VK_ACCESS_2_TRANSFER_WRITE_BIT,
349 VK_IMAGE_LAYOUT_GENERAL,
350 VK_QUEUE_FAMILY_IGNORED);
359 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
370 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
371 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
372 .srcStageMask = slice_state->stage,
373 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
374 .srcAccessMask = slice_state->access,
375 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
376 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
377 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
378 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
379 .buffer = slice_state->buf,
384 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
385 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
386 .pImageMemoryBarriers = img_bar,
387 .imageMemoryBarrierCount = nb_img_bar,
388 .pBufferMemoryBarriers = buf_bar,
389 .bufferMemoryBarrierCount = nb_buf_bar,
391 slice_state->stage = buf_bar[0].dstStageMask;
392 slice_state->access = buf_bar[0].dstAccessMask;
401 VK_FORMAT_UNDEFINED);
405 0, 2*
f->slice_count*
sizeof(uint32_t),
406 VK_FORMAT_UNDEFINED);
410 0, 2*
f->slice_count*
sizeof(uint32_t),
411 VK_FORMAT_UNDEFINED);
418 .img_size[0] =
f->picture.f->width,
419 .img_size[1] =
f->picture.f->height,
420 .chroma_shift[0] =
f->chroma_h_shift,
421 .chroma_shift[1] =
f->chroma_v_shift,
425 .rct_offset = 1 <<
bits,
427 .bits_per_raw_sample =
bits,
428 .quant_table_count =
f->quant_table_count,
429 .version =
f->version,
430 .micro_version =
f->micro_version,
433 .codec_planes =
f->plane_count,
434 .color_planes = color_planes,
435 .transparency =
f->transparency,
438 .colorspace =
f->colorspace,
443 for (
int i = 0;
i <
f->quant_table_count;
i++)
445 (
f->quant_tables[
i][4][127] != 0);
451 memcpy(pd.
fmt_lut, (
int [4]) { 2, 1, 0, 3 }, 4*
sizeof(
int));
456 VK_SHADER_STAGE_COMPUTE_BIT,
459 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
463 for (
int i = 0;
i < color_planes;
i++)
464 vk->CmdClearColorImage(exec->
buf, vkf->
img[
i], VK_IMAGE_LAYOUT_GENERAL,
465 &((VkClearColorValue) { 0 }),
466 1, &((VkImageSubresourceRange) {
467 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
474 reset_shader = &fv->
reset;
479 VK_FORMAT_UNDEFINED);
486 .codec_planes =
f->plane_count,
488 .version =
f->version,
489 .micro_version =
f->micro_version,
491 for (
int i = 0;
i <
f->quant_table_count;
i++)
495 VK_SHADER_STAGE_COMPUTE_BIT,
496 0,
sizeof(pd_reset), &pd_reset);
499 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
500 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
501 .srcStageMask = slice_state->stage,
502 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
503 .srcAccessMask = slice_state->access,
504 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
505 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
506 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
507 .buffer = slice_state->buf,
511 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
512 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
513 .pImageMemoryBarriers = img_bar,
514 .imageMemoryBarrierCount = nb_img_bar,
515 .pBufferMemoryBarriers = buf_bar,
516 .bufferMemoryBarrierCount = nb_buf_bar,
518 slice_state->stage = buf_bar[0].dstStageMask;
519 slice_state->access = buf_bar[0].dstAccessMask;
523 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices,
527 decode_shader = &fv->
decode;
532 VK_FORMAT_UNDEFINED);
534 decode_dst, decode_dst_view,
536 VK_IMAGE_LAYOUT_GENERAL,
541 0, 2*
f->slice_count*
sizeof(uint32_t),
542 VK_FORMAT_UNDEFINED);
547 VK_IMAGE_LAYOUT_GENERAL,
552 VK_SHADER_STAGE_COMPUTE_BIT,
556 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
557 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
558 .srcStageMask = slice_state->stage,
559 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
560 .srcAccessMask = slice_state->access,
561 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
562 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
563 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
564 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
565 .buffer = slice_state->buf,
572 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
573 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
574 VK_ACCESS_SHADER_WRITE_BIT |
575 (!is_rgb ? VK_ACCESS_SHADER_READ_BIT : 0),
576 VK_IMAGE_LAYOUT_GENERAL,
577 VK_QUEUE_FAMILY_IGNORED);
580 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
581 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
582 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
583 VK_IMAGE_LAYOUT_GENERAL,
584 VK_QUEUE_FAMILY_IGNORED);
586 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
587 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
588 .pImageMemoryBarriers = img_bar,
589 .imageMemoryBarrierCount = nb_img_bar,
590 .pBufferMemoryBarriers = buf_bar,
591 .bufferMemoryBarrierCount = nb_buf_bar,
593 slice_state->stage = buf_bar[0].dstStageMask;
594 slice_state->access = buf_bar[0].dstAccessMask;
598 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
613 int smp_bits = use32bit ? 32 : 16;
621 GLSLF(0, #define
TYPE int%i_t ,smp_bits);
622 GLSLF(0, #define VTYPE2
i%ivec2 ,smp_bits);
623 GLSLF(0, #define VTYPE3
i%ivec3 ,smp_bits);
637 void *spv_opaque =
NULL;
640 VK_SHADER_STAGE_COMPUTE_BIT,
641 (
const char *[]) {
"GL_EXT_buffer_reference",
642 "GL_EXT_buffer_reference2" }, 2,
657 .
name =
"rangecoder_static_buf",
658 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
659 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
660 .mem_layout =
"scalar",
661 .buf_content =
"uint8_t zero_one_state[512];",
664 .name =
"crc_ieee_buf",
665 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
666 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
667 .mem_layout =
"scalar",
668 .buf_content =
"uint32_t crc_ieee[256];",
672 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
673 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
674 .mem_layout =
"scalar",
675 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
676 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
686 .
name =
"slice_data_buf",
687 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
688 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
689 .buf_content =
"SliceContext slice_ctx",
690 .buf_elems =
f->max_slice_count,
693 .name =
"slice_offsets_buf",
694 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
695 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
696 .mem_quali =
"readonly",
697 .buf_content =
"u32vec2 slice_offsets",
698 .buf_elems = 2*
f->max_slice_count,
701 .name =
"slice_status_buf",
702 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
703 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
704 .mem_quali =
"writeonly",
705 .buf_content =
"uint32_t slice_status",
706 .buf_elems = 2*
f->max_slice_count,
735 void *spv_opaque =
NULL;
736 int wg_dim =
FFMIN(
s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
739 VK_SHADER_STAGE_COMPUTE_BIT,
740 (
const char *[]) {
"GL_EXT_buffer_reference",
741 "GL_EXT_buffer_reference2" }, 2,
751 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
753 GLSLC(1, u8buf slice_state; );
754 GLSLC(1, uint plane_state_size; );
755 GLSLC(1, uint8_t codec_planes; );
756 GLSLC(1, uint8_t key_frame; );
758 GLSLC(1, uint8_t micro_version; );
759 GLSLC(1, uint8_t padding[1]; );
762 VK_SHADER_STAGE_COMPUTE_BIT);
770 .
name =
"rangecoder_static_buf",
771 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
772 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
773 .mem_layout =
"scalar",
774 .buf_content =
"uint8_t zero_one_state[512];",
778 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
779 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
780 .mem_layout =
"scalar",
781 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
782 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
793 .
name =
"slice_data_buf",
794 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
795 .mem_quali =
"readonly",
796 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
797 .buf_content =
"SliceContext slice_ctx",
798 .buf_elems =
f->max_slice_count,
830 void *spv_opaque =
NULL;
833 s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV;
836 VK_SHADER_STAGE_COMPUTE_BIT,
837 (
const char *[]) {
"GL_EXT_buffer_reference",
838 "GL_EXT_buffer_reference2" }, 2,
848 if (use_cached_reader)
862 .
name =
"rangecoder_static_buf",
863 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
864 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
865 .mem_layout =
"scalar",
866 .buf_content =
"uint8_t zero_one_state[512];",
870 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
871 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
872 .mem_layout =
"scalar",
873 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
874 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
886 .
name =
"slice_data_buf",
887 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
888 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
889 .buf_content =
"SliceContext slice_ctx",
890 .buf_elems =
f->max_slice_count,
894 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
899 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
902 .name =
"slice_status_buf",
903 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
904 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
905 .mem_quali =
"writeonly",
906 .buf_content =
"uint32_t slice_status",
907 .buf_elems = 2*
f->max_slice_count,
911 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
915 .mem_quali =
"writeonly",
917 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
952 frames_ctx->
width =
s->frames->width;
955 vk_frames = frames_ctx->
hwctx;
956 vk_frames->
tiling = VK_IMAGE_TILING_OPTIMAL;
957 vk_frames->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
958 vk_frames->
usage = VK_IMAGE_USAGE_STORAGE_BIT |
959 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
1002 if (
f->version < 3 ||
1003 (
f->version == 4 &&
f->micro_version > 3))
1006 spv = ff_vk_spirv_init();
1043 spv, &fv->
reset,
f->ac));
1070 &fv->
setup, 0, 0, 0,
1073 VK_FORMAT_UNDEFINED));
1075 &fv->
setup, 0, 1, 0,
1078 VK_FORMAT_UNDEFINED));
1085 VK_FORMAT_UNDEFINED));
1090 VK_FORMAT_UNDEFINED));
1110 if (!(slice_status->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1111 VkMappedMemoryRange invalidate_data = {
1112 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
1113 .memory = slice_status->
mem,
1115 .size = 2*fp->
slice_num*
sizeof(uint32_t),
1118 1, &invalidate_data);
1122 uint32_t crc_res = 0;
1137 .
p.
name =
"ffv1_vulkan",