29 #define RGB_LINECACHE 2
41 .queue_flags = VK_QUEUE_COMPUTE_BIT,
108 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
109 GLSLC(1, u8buf slice_data; );
110 GLSLC(1, u8buf slice_state; );
112 GLSLC(1, ivec4 fmt_lut; );
113 GLSLC(1, uvec2 img_size; );
114 GLSLC(1, uvec2 chroma_shift; );
116 GLSLC(1, uint plane_state_size; );
117 GLSLC(1, uint32_t crcref; );
118 GLSLC(1,
int rct_offset; );
120 GLSLC(1, uint8_t extend_lookup[8]; );
121 GLSLC(1, uint8_t bits_per_raw_sample; );
122 GLSLC(1, uint8_t quant_table_count; );
124 GLSLC(1, uint8_t micro_version; );
125 GLSLC(1, uint8_t key_frame; );
127 GLSLC(1, uint8_t codec_planes; );
128 GLSLC(1, uint8_t color_planes; );
129 GLSLC(1, uint8_t transparency; );
130 GLSLC(1, uint8_t planar_rgb; );
131 GLSLC(1, uint8_t colorspace; );
132 GLSLC(1, uint8_t ec; );
133 GLSLC(1, uint8_t golomb; );
134 GLSLC(1, uint8_t check_crc; );
135 GLSLC(1, uint8_t padding[3]; );
138 VK_SHADER_STAGE_COMPUTE_BIT);
165 for (
int i = 0;
i <
f->quant_table_count;
i++)
166 max_contexts =
FFMAX(
f->context_count[
i], max_contexts);
187 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
188 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
194 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
195 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
197 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
210 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
211 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
212 NULL, 2*
f->slice_count*
sizeof(uint32_t),
213 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
214 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
221 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
222 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
223 NULL, 2*
f->slice_count*
sizeof(uint32_t),
224 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
225 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
262 if (slices_buf && slices_buf->host_ref) {
264 data - slices_buf->mapped_mem);
300 int bits =
f->avctx->bits_per_raw_sample > 0 ?
f->avctx->bits_per_raw_sample : 8;
319 VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->
view.
out;
321 VkImageMemoryBarrier2 img_bar[37];
323 VkBufferMemoryBarrier2 buf_bar[8];
331 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
332 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
343 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
344 VK_PIPELINE_STAGE_2_CLEAR_BIT));
346 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
347 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
348 VK_ACCESS_2_TRANSFER_WRITE_BIT,
349 VK_IMAGE_LAYOUT_GENERAL,
350 VK_QUEUE_FAMILY_IGNORED);
359 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
372 ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
373 COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
374 SHADER_STORAGE_WRITE_BIT,
378 COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
379 SHADER_STORAGE_WRITE_BIT,
380 COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
381 SHADER_STORAGE_WRITE_BIT,
383 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
384 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
385 .pImageMemoryBarriers = img_bar,
386 .imageMemoryBarrierCount = nb_img_bar,
387 .pBufferMemoryBarriers = buf_bar,
388 .bufferMemoryBarrierCount = nb_buf_bar,
398 VK_FORMAT_UNDEFINED);
402 0, 2*
f->slice_count*
sizeof(uint32_t),
403 VK_FORMAT_UNDEFINED);
407 0, 2*
f->slice_count*
sizeof(uint32_t),
408 VK_FORMAT_UNDEFINED);
415 .img_size[0] =
f->picture.f->width,
416 .img_size[1] =
f->picture.f->height,
417 .chroma_shift[0] =
f->chroma_h_shift,
418 .chroma_shift[1] =
f->chroma_v_shift,
422 .rct_offset = 1 <<
bits,
424 .bits_per_raw_sample =
bits,
425 .quant_table_count =
f->quant_table_count,
426 .version =
f->version,
427 .micro_version =
f->micro_version,
430 .codec_planes =
f->plane_count,
431 .color_planes = color_planes,
432 .transparency =
f->transparency,
435 .colorspace =
f->colorspace,
440 for (
int i = 0;
i <
f->quant_table_count;
i++)
442 (
f->quant_tables[
i][4][127] != 0);
448 memcpy(pd.
fmt_lut, (
int [4]) { 2, 1, 0, 3 }, 4*
sizeof(
int));
453 VK_SHADER_STAGE_COMPUTE_BIT,
456 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
460 for (
int i = 0;
i < color_planes;
i++)
461 vk->CmdClearColorImage(exec->
buf, vkf->
img[
i], VK_IMAGE_LAYOUT_GENERAL,
462 &((VkClearColorValue) { 0 }),
463 1, &((VkImageSubresourceRange) {
464 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
471 reset_shader = &fv->
reset;
476 VK_FORMAT_UNDEFINED);
483 .codec_planes =
f->plane_count,
485 .version =
f->version,
486 .micro_version =
f->micro_version,
488 for (
int i = 0;
i <
f->quant_table_count;
i++)
492 VK_SHADER_STAGE_COMPUTE_BIT,
493 0,
sizeof(pd_reset), &pd_reset);
497 COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
498 SHADER_STORAGE_WRITE_BIT,
499 COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
504 ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
505 COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
509 COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
510 SHADER_STORAGE_WRITE_BIT,
511 COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
513 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
514 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
515 .pImageMemoryBarriers = img_bar,
516 .imageMemoryBarrierCount = nb_img_bar,
517 .pBufferMemoryBarriers = buf_bar,
518 .bufferMemoryBarrierCount = nb_buf_bar,
523 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices,
527 decode_shader = &fv->
decode;
532 VK_FORMAT_UNDEFINED);
534 decode_dst, decode_dst_view,
536 VK_IMAGE_LAYOUT_GENERAL,
541 0, 2*
f->slice_count*
sizeof(uint32_t),
542 VK_FORMAT_UNDEFINED);
547 VK_IMAGE_LAYOUT_GENERAL,
552 VK_SHADER_STAGE_COMPUTE_BIT,
557 COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
558 COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
559 SHADER_STORAGE_WRITE_BIT,
562 COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
563 COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
564 SHADER_STORAGE_WRITE_BIT,
568 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
569 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
570 VK_ACCESS_SHADER_WRITE_BIT |
571 (!is_rgb ? VK_ACCESS_SHADER_READ_BIT : 0),
572 VK_IMAGE_LAYOUT_GENERAL,
573 VK_QUEUE_FAMILY_IGNORED);
576 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
577 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
578 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
579 VK_IMAGE_LAYOUT_GENERAL,
580 VK_QUEUE_FAMILY_IGNORED);
582 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
583 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
584 .pImageMemoryBarriers = img_bar,
585 .imageMemoryBarrierCount = nb_img_bar,
586 .pBufferMemoryBarriers = buf_bar,
587 .bufferMemoryBarrierCount = nb_buf_bar,
592 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
607 int smp_bits = use32bit ? 32 : 16;
615 GLSLF(0, #define
TYPE int%i_t ,smp_bits);
616 GLSLF(0, #define VTYPE2
i%ivec2 ,smp_bits);
617 GLSLF(0, #define VTYPE3
i%ivec3 ,smp_bits);
631 void *spv_opaque =
NULL;
634 VK_SHADER_STAGE_COMPUTE_BIT,
635 (
const char *[]) {
"GL_EXT_buffer_reference",
636 "GL_EXT_buffer_reference2" }, 2,
651 .
name =
"rangecoder_static_buf",
652 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
653 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
654 .mem_layout =
"scalar",
655 .buf_content =
"uint8_t zero_one_state[512];",
658 .name =
"crc_ieee_buf",
659 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
660 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
661 .mem_layout =
"scalar",
662 .buf_content =
"uint32_t crc_ieee[256];",
666 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
667 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
668 .mem_layout =
"scalar",
669 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
670 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
680 .
name =
"slice_data_buf",
681 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
682 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
683 .buf_content =
"SliceContext slice_ctx",
684 .buf_elems =
f->max_slice_count,
687 .name =
"slice_offsets_buf",
688 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
689 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
690 .mem_quali =
"readonly",
691 .buf_content =
"u32vec2 slice_offsets",
692 .buf_elems = 2*
f->max_slice_count,
695 .name =
"slice_status_buf",
696 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
697 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
698 .mem_quali =
"writeonly",
699 .buf_content =
"uint32_t slice_status",
700 .buf_elems = 2*
f->max_slice_count,
729 void *spv_opaque =
NULL;
730 int wg_dim =
FFMIN(
s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
733 VK_SHADER_STAGE_COMPUTE_BIT,
734 (
const char *[]) {
"GL_EXT_buffer_reference",
735 "GL_EXT_buffer_reference2" }, 2,
745 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
747 GLSLC(1, u8buf slice_state; );
748 GLSLC(1, uint plane_state_size; );
749 GLSLC(1, uint8_t codec_planes; );
750 GLSLC(1, uint8_t key_frame; );
752 GLSLC(1, uint8_t micro_version; );
753 GLSLC(1, uint8_t padding[1]; );
756 VK_SHADER_STAGE_COMPUTE_BIT);
764 .
name =
"rangecoder_static_buf",
765 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
766 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
767 .mem_layout =
"scalar",
768 .buf_content =
"uint8_t zero_one_state[512];",
772 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
773 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
774 .mem_layout =
"scalar",
775 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
776 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
787 .
name =
"slice_data_buf",
788 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
789 .mem_quali =
"readonly",
790 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
791 .buf_content =
"SliceContext slice_ctx",
792 .buf_elems =
f->max_slice_count,
824 void *spv_opaque =
NULL;
827 s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV;
830 VK_SHADER_STAGE_COMPUTE_BIT,
831 (
const char *[]) {
"GL_EXT_buffer_reference",
832 "GL_EXT_buffer_reference2" }, 2,
842 if (use_cached_reader)
856 .
name =
"rangecoder_static_buf",
857 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
858 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
859 .mem_layout =
"scalar",
860 .buf_content =
"uint8_t zero_one_state[512];",
864 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
865 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
866 .mem_layout =
"scalar",
867 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
868 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
880 .
name =
"slice_data_buf",
881 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
882 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
883 .buf_content =
"SliceContext slice_ctx",
884 .buf_elems =
f->max_slice_count,
888 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
893 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
896 .name =
"slice_status_buf",
897 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
898 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
899 .mem_quali =
"writeonly",
900 .buf_content =
"uint32_t slice_status",
901 .buf_elems = 2*
f->max_slice_count,
905 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
909 .mem_quali =
"writeonly",
911 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
946 frames_ctx->
width =
s->frames->width;
949 vk_frames = frames_ctx->
hwctx;
950 vk_frames->
tiling = VK_IMAGE_TILING_OPTIMAL;
951 vk_frames->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
952 vk_frames->
usage = VK_IMAGE_USAGE_STORAGE_BIT |
953 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
996 if (
f->version < 3 ||
997 (
f->version == 4 &&
f->micro_version > 3))
1000 spv = ff_vk_spirv_init();
1037 spv, &fv->
reset,
f->ac));
1064 &fv->
setup, 0, 0, 0,
1067 VK_FORMAT_UNDEFINED));
1069 &fv->
setup, 0, 1, 0,
1072 VK_FORMAT_UNDEFINED));
1079 VK_FORMAT_UNDEFINED));
1084 VK_FORMAT_UNDEFINED));
1104 if (!(slice_status->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1105 VkMappedMemoryRange invalidate_data = {
1106 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
1107 .memory = slice_status->
mem,
1109 .size = 2*fp->
slice_num*
sizeof(uint32_t),
1112 1, &invalidate_data);
1116 uint32_t crc_res = 0;
1131 .
p.
name =
"ffv1_vulkan",